1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscdmplex.h> 5 #include <petscblaslapack.h> 6 #include <petsc/private/sfimpl.h> 7 #include <petsc/private/dmpleximpl.h> 8 9 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 10 11 /* if range is true, it returns B s.t. span{B} = range(A) 12 if range is false, it returns B s.t. range(B) _|_ range(A) */ 13 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 14 { 15 #if !defined(PETSC_USE_COMPLEX) 16 PetscScalar *uwork,*data,*U, ds = 0.; 17 PetscReal *sing; 18 PetscBLASInt bM,bN,lwork,lierr,di = 1; 19 PetscInt ulw,i,nr,nc,n; 20 PetscErrorCode ierr; 21 22 PetscFunctionBegin; 23 #if defined(PETSC_MISSING_LAPACK_GESVD) 24 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 25 #else 26 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 27 if (!nr || !nc) PetscFunctionReturn(0); 28 29 /* workspace */ 30 if (!work) { 31 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 32 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 33 } else { 34 ulw = lw; 35 uwork = work; 36 } 37 n = PetscMin(nr,nc); 38 if (!rwork) { 39 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 40 } else { 41 sing = rwork; 42 } 43 44 /* SVD */ 45 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 46 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 49 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 50 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 51 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 52 ierr = PetscFPTrapPop();CHKERRQ(ierr); 53 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 54 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 55 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 56 if (!rwork) { 57 ierr = PetscFree(sing);CHKERRQ(ierr); 58 } 59 if (!work) { 60 ierr = PetscFree(uwork);CHKERRQ(ierr); 61 } 62 /* create B */ 63 if (!range) { 64 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 65 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 66 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 67 } else { 68 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 69 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 70 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 71 } 72 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 73 ierr = PetscFree(U);CHKERRQ(ierr); 74 #endif 75 #else /* PETSC_USE_COMPLEX */ 76 PetscFunctionBegin; 77 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 78 #endif 79 PetscFunctionReturn(0); 80 } 81 82 /* TODO REMOVE */ 83 #if defined(PRINT_GDET) 84 static int inc = 0; 85 static int lev = 0; 86 #endif 87 88 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 89 { 90 PetscErrorCode ierr; 91 Mat GE,GEd; 92 PetscInt rsize,csize,esize; 93 PetscScalar *ptr; 94 95 PetscFunctionBegin; 96 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 97 if (!esize) PetscFunctionReturn(0); 98 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 99 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 100 101 /* gradients */ 102 ptr = work + 5*esize; 103 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 104 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 105 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 106 ierr = MatDestroy(&GE);CHKERRQ(ierr); 107 108 /* constants */ 109 ptr += rsize*csize; 110 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 111 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 112 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 113 ierr = MatDestroy(&GE);CHKERRQ(ierr); 114 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 115 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 116 117 if (corners) { 118 Mat GEc; 119 PetscScalar *vals,v; 120 121 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 122 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 123 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 124 /* v = PetscAbsScalar(vals[0]) */; 125 v = 1.; 126 cvals[0] = vals[0]/v; 127 cvals[1] = vals[1]/v; 128 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 129 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 130 #if defined(PRINT_GDET) 131 { 132 PetscViewer viewer; 133 char filename[256]; 134 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 135 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 136 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 137 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 138 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 140 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 141 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 142 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 143 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 144 } 145 #endif 146 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 147 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 148 } 149 150 PetscFunctionReturn(0); 151 } 152 153 PetscErrorCode PCBDDCNedelecSupport(PC pc) 154 { 155 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 156 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 157 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 158 Vec tvec; 159 PetscSF sfv; 160 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 161 MPI_Comm comm; 162 IS lned,primals,allprimals,nedfieldlocal; 163 IS *eedges,*extrows,*extcols,*alleedges; 164 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 165 PetscScalar *vals,*work; 166 PetscReal *rwork; 167 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 168 PetscInt ne,nv,Lv,order,n,field; 169 PetscInt n_neigh,*neigh,*n_shared,**shared; 170 PetscInt i,j,extmem,cum,maxsize,nee; 171 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 172 PetscInt *sfvleaves,*sfvroots; 173 PetscInt *corners,*cedges; 174 PetscInt *ecount,**eneighs,*vcount,**vneighs; 175 #if defined(PETSC_USE_DEBUG) 176 PetscInt *emarks; 177 #endif 178 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 179 PetscErrorCode ierr; 180 181 PetscFunctionBegin; 182 /* If the discrete gradient is defined for a subset of dofs and global is true, 183 it assumes G is given in global ordering for all the dofs. 184 Otherwise, the ordering is global for the Nedelec field */ 185 order = pcbddc->nedorder; 186 conforming = pcbddc->conforming; 187 field = pcbddc->nedfield; 188 global = pcbddc->nedglobal; 189 setprimal = PETSC_FALSE; 190 print = PETSC_FALSE; 191 singular = PETSC_FALSE; 192 193 /* Command line customization */ 194 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 195 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 196 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 197 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 198 /* print debug info TODO: to be removed */ 199 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 200 ierr = PetscOptionsEnd();CHKERRQ(ierr); 201 202 /* Return if there are no edges in the decomposition and the problem is not singular */ 203 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 204 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 205 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 206 if (!singular) { 207 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 208 lrc[0] = PETSC_FALSE; 209 for (i=0;i<n;i++) { 210 if (PetscRealPart(vals[i]) > 2.) { 211 lrc[0] = PETSC_TRUE; 212 break; 213 } 214 } 215 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 216 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 217 if (!lrc[1]) PetscFunctionReturn(0); 218 } 219 220 /* Get Nedelec field */ 221 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 222 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 223 if (pcbddc->n_ISForDofsLocal && field >= 0) { 224 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 225 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 226 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 227 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 228 ne = n; 229 nedfieldlocal = NULL; 230 global = PETSC_TRUE; 231 } else if (field == PETSC_DECIDE) { 232 PetscInt rst,ren,*idx; 233 234 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 235 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 236 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 237 for (i=rst;i<ren;i++) { 238 PetscInt nc; 239 240 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 241 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 242 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 243 } 244 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 245 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 246 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 247 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 248 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 249 } else { 250 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 251 } 252 253 /* Sanity checks */ 254 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 255 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 256 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 257 258 /* Just set primal dofs and return */ 259 if (setprimal) { 260 IS enedfieldlocal; 261 PetscInt *eidxs; 262 263 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 264 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 265 if (nedfieldlocal) { 266 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 267 for (i=0,cum=0;i<ne;i++) { 268 if (PetscRealPart(vals[idxs[i]]) > 2.) { 269 eidxs[cum++] = idxs[i]; 270 } 271 } 272 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 273 } else { 274 for (i=0,cum=0;i<ne;i++) { 275 if (PetscRealPart(vals[i]) > 2.) { 276 eidxs[cum++] = i; 277 } 278 } 279 } 280 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 281 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 282 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 283 ierr = PetscFree(eidxs);CHKERRQ(ierr); 284 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 285 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 /* Compute some l2g maps */ 290 if (nedfieldlocal) { 291 IS is; 292 293 /* need to map from the local Nedelec field to local numbering */ 294 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 295 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 296 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 297 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 298 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 299 if (global) { 300 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 301 el2g = al2g; 302 } else { 303 IS gis; 304 305 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 306 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 307 ierr = ISDestroy(&gis);CHKERRQ(ierr); 308 } 309 ierr = ISDestroy(&is);CHKERRQ(ierr); 310 } else { 311 /* restore default */ 312 pcbddc->nedfield = -1; 313 /* one ref for the destruction of al2g, one for el2g */ 314 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 315 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 316 el2g = al2g; 317 fl2g = NULL; 318 } 319 320 /* Start communication to drop connections for interior edges (for cc analysis only) */ 321 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 322 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 323 if (nedfieldlocal) { 324 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 325 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 326 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 327 } else { 328 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 329 } 330 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 331 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 332 333 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 334 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 335 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 336 if (global) { 337 PetscInt rst; 338 339 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 340 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 341 if (matis->sf_rootdata[i] < 2) { 342 matis->sf_rootdata[cum++] = i + rst; 343 } 344 } 345 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 346 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 347 } else { 348 PetscInt *tbz; 349 350 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 351 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 352 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 353 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 354 for (i=0,cum=0;i<ne;i++) 355 if (matis->sf_leafdata[idxs[i]] == 1) 356 tbz[cum++] = i; 357 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 358 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 359 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 360 ierr = PetscFree(tbz);CHKERRQ(ierr); 361 } 362 } else { /* we need the entire G to infer the nullspace */ 363 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 364 G = pcbddc->discretegradient; 365 } 366 367 /* Extract subdomain relevant rows of G */ 368 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 369 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 370 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 371 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 372 ierr = ISDestroy(&lned);CHKERRQ(ierr); 373 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 374 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 375 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 376 377 /* SF for nodal dofs communications */ 378 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 379 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 380 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 381 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 382 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 384 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 386 i = singular ? 2 : 1; 387 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 388 389 /* Destroy temporary G created in MATIS format and modified G */ 390 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 391 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 392 ierr = MatDestroy(&G);CHKERRQ(ierr); 393 394 if (print) { 395 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 396 ierr = MatView(lG,NULL);CHKERRQ(ierr); 397 } 398 399 /* Save lG for values insertion in change of basis */ 400 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 401 402 /* Analyze the edge-nodes connections (duplicate lG) */ 403 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 404 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 405 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 406 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 407 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 409 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 410 /* need to import the boundary specification to ensure the 411 proper detection of coarse edges' endpoints */ 412 if (pcbddc->DirichletBoundariesLocal) { 413 IS is; 414 415 if (fl2g) { 416 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 417 } else { 418 is = pcbddc->DirichletBoundariesLocal; 419 } 420 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 421 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 422 for (i=0;i<cum;i++) { 423 if (idxs[i] >= 0) { 424 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 425 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 426 } 427 } 428 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 429 if (fl2g) { 430 ierr = ISDestroy(&is);CHKERRQ(ierr); 431 } 432 } 433 if (pcbddc->NeumannBoundariesLocal) { 434 IS is; 435 436 if (fl2g) { 437 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 438 } else { 439 is = pcbddc->NeumannBoundariesLocal; 440 } 441 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 442 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 443 for (i=0;i<cum;i++) { 444 if (idxs[i] >= 0) { 445 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 446 } 447 } 448 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 449 if (fl2g) { 450 ierr = ISDestroy(&is);CHKERRQ(ierr); 451 } 452 } 453 454 /* Count neighs per dof */ 455 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 456 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 457 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 458 for (i=1,cum=0;i<n_neigh;i++) { 459 cum += n_shared[i]; 460 for (j=0;j<n_shared[i];j++) { 461 ecount[shared[i][j]]++; 462 } 463 } 464 if (ne) { 465 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 466 } 467 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 468 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 469 for (i=1;i<n_neigh;i++) { 470 for (j=0;j<n_shared[i];j++) { 471 PetscInt k = shared[i][j]; 472 eneighs[k][ecount[k]] = neigh[i]; 473 ecount[k]++; 474 } 475 } 476 for (i=0;i<ne;i++) { 477 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 478 } 479 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 480 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 481 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 482 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 483 for (i=1,cum=0;i<n_neigh;i++) { 484 cum += n_shared[i]; 485 for (j=0;j<n_shared[i];j++) { 486 vcount[shared[i][j]]++; 487 } 488 } 489 if (nv) { 490 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 491 } 492 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 493 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 494 for (i=1;i<n_neigh;i++) { 495 for (j=0;j<n_shared[i];j++) { 496 PetscInt k = shared[i][j]; 497 vneighs[k][vcount[k]] = neigh[i]; 498 vcount[k]++; 499 } 500 } 501 for (i=0;i<nv;i++) { 502 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 503 } 504 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 505 506 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 507 for proper detection of coarse edges' endpoints */ 508 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 509 for (i=0;i<ne;i++) { 510 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 511 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 512 } 513 } 514 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 515 if (!conforming) { 516 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 517 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 518 } 519 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 520 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 521 cum = 0; 522 for (i=0;i<ne;i++) { 523 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 524 if (!PetscBTLookup(btee,i)) { 525 marks[cum++] = i; 526 continue; 527 } 528 /* set badly connected edge dofs as primal */ 529 if (!conforming) { 530 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 531 marks[cum++] = i; 532 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 533 for (j=ii[i];j<ii[i+1];j++) { 534 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 535 } 536 } else { 537 /* every edge dofs should be connected trough a certain number of nodal dofs 538 to other edge dofs belonging to coarse edges 539 - at most 2 endpoints 540 - order-1 interior nodal dofs 541 - no undefined nodal dofs (nconn < order) 542 */ 543 PetscInt ends = 0,ints = 0, undef = 0; 544 for (j=ii[i];j<ii[i+1];j++) { 545 PetscInt v = jj[j],k; 546 PetscInt nconn = iit[v+1]-iit[v]; 547 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 548 if (nconn > order) ends++; 549 else if (nconn == order) ints++; 550 else undef++; 551 } 552 if (undef || ends > 2 || ints != order -1) { 553 marks[cum++] = i; 554 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 555 for (j=ii[i];j<ii[i+1];j++) { 556 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 557 } 558 } 559 } 560 } 561 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 562 if (!order && ii[i+1] != ii[i]) { 563 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 564 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 565 } 566 } 567 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 568 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 569 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 570 if (!conforming) { 571 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 572 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 573 } 574 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 575 576 /* identify splitpoints and corner candidates */ 577 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 578 if (print) { 579 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 580 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 581 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 582 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 583 } 584 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 585 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 586 for (i=0;i<nv;i++) { 587 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 588 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 589 if (!order) { /* variable order */ 590 PetscReal vorder = 0.; 591 592 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 593 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 594 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 595 ord = 1; 596 } 597 #if defined(PETSC_USE_DEBUG) 598 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 599 #endif 600 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 601 if (PetscBTLookup(btbd,jj[j])) { 602 bdir = PETSC_TRUE; 603 break; 604 } 605 if (vc != ecount[jj[j]]) { 606 sneighs = PETSC_FALSE; 607 } else { 608 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 609 for (k=0;k<vc;k++) { 610 if (vn[k] != en[k]) { 611 sneighs = PETSC_FALSE; 612 break; 613 } 614 } 615 } 616 } 617 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 618 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 619 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 620 } else if (test == ord) { 621 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 622 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 623 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 624 } else { 625 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 626 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 627 } 628 } 629 } 630 ierr = PetscFree(ecount);CHKERRQ(ierr); 631 ierr = PetscFree(vcount);CHKERRQ(ierr); 632 if (ne) { 633 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 634 } 635 if (nv) { 636 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 637 } 638 ierr = PetscFree(eneighs);CHKERRQ(ierr); 639 ierr = PetscFree(vneighs);CHKERRQ(ierr); 640 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 641 642 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 643 if (order != 1) { 644 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 645 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 646 for (i=0;i<nv;i++) { 647 if (PetscBTLookup(btvcand,i)) { 648 PetscBool found = PETSC_FALSE; 649 for (j=ii[i];j<ii[i+1] && !found;j++) { 650 PetscInt k,e = jj[j]; 651 if (PetscBTLookup(bte,e)) continue; 652 for (k=iit[e];k<iit[e+1];k++) { 653 PetscInt v = jjt[k]; 654 if (v != i && PetscBTLookup(btvcand,v)) { 655 found = PETSC_TRUE; 656 break; 657 } 658 } 659 } 660 if (!found) { 661 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 662 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 663 } else { 664 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 665 } 666 } 667 } 668 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 669 } 670 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 671 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 672 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 673 674 /* Get the local G^T explicitly */ 675 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 676 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 677 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 678 679 /* Mark interior nodal dofs */ 680 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 681 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 682 for (i=1;i<n_neigh;i++) { 683 for (j=0;j<n_shared[i];j++) { 684 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 685 } 686 } 687 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 688 689 /* communicate corners and splitpoints */ 690 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 691 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 692 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 693 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 694 695 if (print) { 696 IS tbz; 697 698 cum = 0; 699 for (i=0;i<nv;i++) 700 if (sfvleaves[i]) 701 vmarks[cum++] = i; 702 703 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 704 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 705 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 706 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 707 } 708 709 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 710 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 711 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 712 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 713 714 /* Zero rows of lGt corresponding to identified corners 715 and interior nodal dofs */ 716 cum = 0; 717 for (i=0;i<nv;i++) { 718 if (sfvleaves[i]) { 719 vmarks[cum++] = i; 720 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 721 } 722 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 723 } 724 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 725 if (print) { 726 IS tbz; 727 728 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 729 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 730 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 731 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 732 } 733 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 734 ierr = PetscFree(vmarks);CHKERRQ(ierr); 735 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 736 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 737 738 /* Recompute G */ 739 ierr = MatDestroy(&lG);CHKERRQ(ierr); 740 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 741 if (print) { 742 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 743 ierr = MatView(lG,NULL);CHKERRQ(ierr); 744 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 745 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 746 } 747 748 /* Get primal dofs (if any) */ 749 cum = 0; 750 for (i=0;i<ne;i++) { 751 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 752 } 753 if (fl2g) { 754 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 755 } 756 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 757 if (print) { 758 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 759 ierr = ISView(primals,NULL);CHKERRQ(ierr); 760 } 761 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 762 /* TODO: what if the user passed in some of them ? */ 763 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 764 ierr = ISDestroy(&primals);CHKERRQ(ierr); 765 766 /* Compute edge connectivity */ 767 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 768 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 769 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 770 if (fl2g) { 771 PetscBT btf; 772 PetscInt *iia,*jja,*iiu,*jju; 773 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 774 775 /* create CSR for all local dofs */ 776 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 777 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 778 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 779 iiu = pcbddc->mat_graph->xadj; 780 jju = pcbddc->mat_graph->adjncy; 781 } else if (pcbddc->use_local_adj) { 782 rest = PETSC_TRUE; 783 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 784 } else { 785 free = PETSC_TRUE; 786 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 787 iiu[0] = 0; 788 for (i=0;i<n;i++) { 789 iiu[i+1] = i+1; 790 jju[i] = -1; 791 } 792 } 793 794 /* import sizes of CSR */ 795 iia[0] = 0; 796 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 797 798 /* overwrite entries corresponding to the Nedelec field */ 799 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 800 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 801 for (i=0;i<ne;i++) { 802 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 803 iia[idxs[i]+1] = ii[i+1]-ii[i]; 804 } 805 806 /* iia in CSR */ 807 for (i=0;i<n;i++) iia[i+1] += iia[i]; 808 809 /* jja in CSR */ 810 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 811 for (i=0;i<n;i++) 812 if (!PetscBTLookup(btf,i)) 813 for (j=0;j<iiu[i+1]-iiu[i];j++) 814 jja[iia[i]+j] = jju[iiu[i]+j]; 815 816 /* map edge dofs connectivity */ 817 if (jj) { 818 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 819 for (i=0;i<ne;i++) { 820 PetscInt e = idxs[i]; 821 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 822 } 823 } 824 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 825 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 826 if (rest) { 827 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 828 } 829 if (free) { 830 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 831 } 832 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 833 } else { 834 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 835 } 836 837 /* Analyze interface for edge dofs */ 838 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 839 pcbddc->mat_graph->twodim = PETSC_FALSE; 840 841 /* Get coarse edges in the edge space */ 842 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 843 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 844 845 if (fl2g) { 846 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 847 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 848 for (i=0;i<nee;i++) { 849 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 850 } 851 } else { 852 eedges = alleedges; 853 primals = allprimals; 854 } 855 856 /* Mark fine edge dofs with their coarse edge id */ 857 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 858 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 859 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 860 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 861 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 862 if (print) { 863 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 864 ierr = ISView(primals,NULL);CHKERRQ(ierr); 865 } 866 867 maxsize = 0; 868 for (i=0;i<nee;i++) { 869 PetscInt size,mark = i+1; 870 871 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 872 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 873 for (j=0;j<size;j++) marks[idxs[j]] = mark; 874 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 875 maxsize = PetscMax(maxsize,size); 876 } 877 878 /* Find coarse edge endpoints */ 879 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 880 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 881 for (i=0;i<nee;i++) { 882 PetscInt mark = i+1,size; 883 884 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 885 if (!size && nedfieldlocal) continue; 886 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 887 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 888 if (print) { 889 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 890 ISView(eedges[i],NULL); 891 } 892 for (j=0;j<size;j++) { 893 PetscInt k, ee = idxs[j]; 894 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 895 for (k=ii[ee];k<ii[ee+1];k++) { 896 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 897 if (PetscBTLookup(btv,jj[k])) { 898 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 899 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 900 PetscInt k2; 901 PetscBool corner = PETSC_FALSE; 902 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 903 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 904 /* it's a corner if either is connected with an edge dof belonging to a different cc or 905 if the edge dof lie on the natural part of the boundary */ 906 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 907 corner = PETSC_TRUE; 908 break; 909 } 910 } 911 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 912 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 913 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 914 } else { 915 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 916 } 917 } 918 } 919 } 920 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 921 } 922 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 923 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 924 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 925 926 /* Reset marked primal dofs */ 927 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 928 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 929 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 930 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 931 932 /* Now use the initial lG */ 933 ierr = MatDestroy(&lG);CHKERRQ(ierr); 934 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 935 lG = lGinit; 936 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 937 938 /* Compute extended cols indices */ 939 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 940 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 941 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 942 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 943 i *= maxsize; 944 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 945 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 946 eerr = PETSC_FALSE; 947 for (i=0;i<nee;i++) { 948 PetscInt size,found = 0; 949 950 cum = 0; 951 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 952 if (!size && nedfieldlocal) continue; 953 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 954 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 955 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 956 for (j=0;j<size;j++) { 957 PetscInt k,ee = idxs[j]; 958 for (k=ii[ee];k<ii[ee+1];k++) { 959 PetscInt vv = jj[k]; 960 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 961 else if (!PetscBTLookupSet(btvc,vv)) found++; 962 } 963 } 964 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 965 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 966 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 967 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 968 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 969 /* it may happen that endpoints are not defined at this point 970 if it is the case, mark this edge for a second pass */ 971 if (cum != size -1 || found != 2) { 972 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 973 if (print) { 974 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 975 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 976 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 977 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 978 } 979 eerr = PETSC_TRUE; 980 } 981 } 982 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 983 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 984 if (done) { 985 PetscInt *newprimals; 986 987 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 988 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 989 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 990 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 991 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 992 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 993 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 994 for (i=0;i<nee;i++) { 995 PetscBool has_candidates = PETSC_FALSE; 996 if (PetscBTLookup(bter,i)) { 997 PetscInt size,mark = i+1; 998 999 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1000 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1001 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1002 for (j=0;j<size;j++) { 1003 PetscInt k,ee = idxs[j]; 1004 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1005 for (k=ii[ee];k<ii[ee+1];k++) { 1006 /* set all candidates located on the edge as corners */ 1007 if (PetscBTLookup(btvcand,jj[k])) { 1008 PetscInt k2,vv = jj[k]; 1009 has_candidates = PETSC_TRUE; 1010 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1011 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1012 /* set all edge dofs connected to candidate as primals */ 1013 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1014 if (marks[jjt[k2]] == mark) { 1015 PetscInt k3,ee2 = jjt[k2]; 1016 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1017 newprimals[cum++] = ee2; 1018 /* finally set the new corners */ 1019 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1020 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1021 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1022 } 1023 } 1024 } 1025 } else { 1026 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1027 } 1028 } 1029 } 1030 if (!has_candidates) { /* circular edge */ 1031 PetscInt k, ee = idxs[0],*tmarks; 1032 1033 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1034 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1035 for (k=ii[ee];k<ii[ee+1];k++) { 1036 PetscInt k2; 1037 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1038 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1039 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1040 } 1041 for (j=0;j<size;j++) { 1042 if (tmarks[idxs[j]] > 1) { 1043 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1044 newprimals[cum++] = idxs[j]; 1045 } 1046 } 1047 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1048 } 1049 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1050 } 1051 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1052 } 1053 ierr = PetscFree(extcols);CHKERRQ(ierr); 1054 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1055 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1056 if (fl2g) { 1057 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1058 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1059 for (i=0;i<nee;i++) { 1060 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1061 } 1062 ierr = PetscFree(eedges);CHKERRQ(ierr); 1063 } 1064 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1065 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1066 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1067 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1068 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1069 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1070 pcbddc->mat_graph->twodim = PETSC_FALSE; 1071 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1072 if (fl2g) { 1073 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1074 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1075 for (i=0;i<nee;i++) { 1076 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1077 } 1078 } else { 1079 eedges = alleedges; 1080 primals = allprimals; 1081 } 1082 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1083 1084 /* Mark again */ 1085 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1086 for (i=0;i<nee;i++) { 1087 PetscInt size,mark = i+1; 1088 1089 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1090 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1091 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1092 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1093 } 1094 if (print) { 1095 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1096 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1097 } 1098 1099 /* Recompute extended cols */ 1100 eerr = PETSC_FALSE; 1101 for (i=0;i<nee;i++) { 1102 PetscInt size; 1103 1104 cum = 0; 1105 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1106 if (!size && nedfieldlocal) continue; 1107 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1108 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1109 for (j=0;j<size;j++) { 1110 PetscInt k,ee = idxs[j]; 1111 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1112 } 1113 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1114 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1115 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1116 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1117 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1118 if (cum != size -1) { 1119 if (print) { 1120 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1121 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1122 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1123 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1124 } 1125 eerr = PETSC_TRUE; 1126 } 1127 } 1128 } 1129 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1130 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1131 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1132 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1133 /* an error should not occur at this point */ 1134 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1135 1136 /* Check the number of endpoints */ 1137 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1138 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1139 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1140 for (i=0;i<nee;i++) { 1141 PetscInt size, found = 0, gc[2]; 1142 1143 /* init with defaults */ 1144 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1145 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1146 if (!size && nedfieldlocal) continue; 1147 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1148 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1149 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1150 for (j=0;j<size;j++) { 1151 PetscInt k,ee = idxs[j]; 1152 for (k=ii[ee];k<ii[ee+1];k++) { 1153 PetscInt vv = jj[k]; 1154 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1155 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1156 corners[i*2+found++] = vv; 1157 } 1158 } 1159 } 1160 if (found != 2) { 1161 PetscInt e; 1162 if (fl2g) { 1163 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1164 } else { 1165 e = idxs[0]; 1166 } 1167 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1168 } 1169 1170 /* get primal dof index on this coarse edge */ 1171 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1172 if (gc[0] > gc[1]) { 1173 PetscInt swap = corners[2*i]; 1174 corners[2*i] = corners[2*i+1]; 1175 corners[2*i+1] = swap; 1176 } 1177 cedges[i] = idxs[size-1]; 1178 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1179 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1180 } 1181 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1182 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1183 1184 #if defined(PETSC_USE_DEBUG) 1185 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1186 not interfere with neighbouring coarse edges */ 1187 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1188 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1189 for (i=0;i<nv;i++) { 1190 PetscInt emax = 0,eemax = 0; 1191 1192 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1193 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1194 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1195 for (j=1;j<nee+1;j++) { 1196 if (emax < emarks[j]) { 1197 emax = emarks[j]; 1198 eemax = j; 1199 } 1200 } 1201 /* not relevant for edges */ 1202 if (!eemax) continue; 1203 1204 for (j=ii[i];j<ii[i+1];j++) { 1205 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1206 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1207 } 1208 } 1209 } 1210 ierr = PetscFree(emarks);CHKERRQ(ierr); 1211 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1212 #endif 1213 1214 /* Compute extended rows indices for edge blocks of the change of basis */ 1215 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1216 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1217 extmem *= maxsize; 1218 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1219 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1220 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1221 for (i=0;i<nv;i++) { 1222 PetscInt mark = 0,size,start; 1223 1224 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1225 for (j=ii[i];j<ii[i+1];j++) 1226 if (marks[jj[j]] && !mark) 1227 mark = marks[jj[j]]; 1228 1229 /* not relevant */ 1230 if (!mark) continue; 1231 1232 /* import extended row */ 1233 mark--; 1234 start = mark*extmem+extrowcum[mark]; 1235 size = ii[i+1]-ii[i]; 1236 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1237 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1238 extrowcum[mark] += size; 1239 } 1240 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1241 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1242 ierr = PetscFree(marks);CHKERRQ(ierr); 1243 1244 /* Compress extrows */ 1245 cum = 0; 1246 for (i=0;i<nee;i++) { 1247 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1248 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1249 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1250 cum = PetscMax(cum,size); 1251 } 1252 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1253 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1254 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1255 1256 /* Workspace for lapack inner calls and VecSetValues */ 1257 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1258 1259 /* Create change of basis matrix (preallocation can be improved) */ 1260 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1261 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1262 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1263 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1264 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1265 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1266 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1267 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1268 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1269 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1270 1271 /* Defaults to identity */ 1272 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1273 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1274 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1275 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1276 1277 /* Create discrete gradient for the coarser level if needed */ 1278 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1279 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1280 if (pcbddc->current_level < pcbddc->max_levels) { 1281 ISLocalToGlobalMapping cel2g,cvl2g; 1282 IS wis,gwis; 1283 PetscInt cnv,cne; 1284 1285 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1286 if (fl2g) { 1287 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1288 } else { 1289 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1290 pcbddc->nedclocal = wis; 1291 } 1292 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1293 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1294 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1295 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1296 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1297 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1298 1299 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1300 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1301 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1302 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1303 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1304 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1305 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1306 1307 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1308 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1309 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1310 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1311 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1312 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1313 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1314 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1315 } 1316 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1317 1318 #if defined(PRINT_GDET) 1319 inc = 0; 1320 lev = pcbddc->current_level; 1321 #endif 1322 1323 /* Insert values in the change of basis matrix */ 1324 for (i=0;i<nee;i++) { 1325 Mat Gins = NULL, GKins = NULL; 1326 IS cornersis = NULL; 1327 PetscScalar cvals[2]; 1328 1329 if (pcbddc->nedcG) { 1330 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1331 } 1332 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1333 if (Gins && GKins) { 1334 PetscScalar *data; 1335 const PetscInt *rows,*cols; 1336 PetscInt nrh,nch,nrc,ncc; 1337 1338 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1339 /* H1 */ 1340 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1341 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1342 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1343 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1344 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1345 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1346 /* complement */ 1347 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1348 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1349 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1350 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1351 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1352 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1353 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1354 1355 /* coarse discrete gradient */ 1356 if (pcbddc->nedcG) { 1357 PetscInt cols[2]; 1358 1359 cols[0] = 2*i; 1360 cols[1] = 2*i+1; 1361 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1362 } 1363 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1364 } 1365 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1366 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1367 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1368 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1369 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1370 } 1371 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1372 1373 /* Start assembling */ 1374 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1375 if (pcbddc->nedcG) { 1376 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1377 } 1378 1379 /* Free */ 1380 if (fl2g) { 1381 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1382 for (i=0;i<nee;i++) { 1383 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1384 } 1385 ierr = PetscFree(eedges);CHKERRQ(ierr); 1386 } 1387 1388 /* hack mat_graph with primal dofs on the coarse edges */ 1389 { 1390 PCBDDCGraph graph = pcbddc->mat_graph; 1391 PetscInt *oqueue = graph->queue; 1392 PetscInt *ocptr = graph->cptr; 1393 PetscInt ncc,*idxs; 1394 1395 /* find first primal edge */ 1396 if (pcbddc->nedclocal) { 1397 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1398 } else { 1399 if (fl2g) { 1400 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1401 } 1402 idxs = cedges; 1403 } 1404 cum = 0; 1405 while (cum < nee && cedges[cum] < 0) cum++; 1406 1407 /* adapt connected components */ 1408 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1409 graph->cptr[0] = 0; 1410 for (i=0,ncc=0;i<graph->ncc;i++) { 1411 PetscInt lc = ocptr[i+1]-ocptr[i]; 1412 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1413 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1414 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1415 ncc++; 1416 lc--; 1417 cum++; 1418 while (cum < nee && cedges[cum] < 0) cum++; 1419 } 1420 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1421 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1422 ncc++; 1423 } 1424 graph->ncc = ncc; 1425 if (pcbddc->nedclocal) { 1426 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1427 } 1428 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1429 } 1430 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1431 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1432 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1433 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1434 1435 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1436 ierr = PetscFree(extrow);CHKERRQ(ierr); 1437 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1438 ierr = PetscFree(corners);CHKERRQ(ierr); 1439 ierr = PetscFree(cedges);CHKERRQ(ierr); 1440 ierr = PetscFree(extrows);CHKERRQ(ierr); 1441 ierr = PetscFree(extcols);CHKERRQ(ierr); 1442 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1443 1444 /* Complete assembling */ 1445 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1446 if (pcbddc->nedcG) { 1447 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1448 #if 0 1449 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1450 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1451 #endif 1452 } 1453 1454 /* set change of basis */ 1455 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1456 ierr = MatDestroy(&T);CHKERRQ(ierr); 1457 1458 PetscFunctionReturn(0); 1459 } 1460 1461 /* the near-null space of BDDC carries information on quadrature weights, 1462 and these can be collinear -> so cheat with MatNullSpaceCreate 1463 and create a suitable set of basis vectors first */ 1464 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1465 { 1466 PetscErrorCode ierr; 1467 PetscInt i; 1468 1469 PetscFunctionBegin; 1470 for (i=0;i<nvecs;i++) { 1471 PetscInt first,last; 1472 1473 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1474 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1475 if (i>=first && i < last) { 1476 PetscScalar *data; 1477 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1478 if (!has_const) { 1479 data[i-first] = 1.; 1480 } else { 1481 data[2*i-first] = 1./PetscSqrtReal(2.); 1482 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1483 } 1484 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1485 } 1486 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1487 } 1488 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1489 for (i=0;i<nvecs;i++) { /* reset vectors */ 1490 PetscInt first,last; 1491 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1492 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1493 if (i>=first && i < last) { 1494 PetscScalar *data; 1495 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1496 if (!has_const) { 1497 data[i-first] = 0.; 1498 } else { 1499 data[2*i-first] = 0.; 1500 data[2*i-first+1] = 0.; 1501 } 1502 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1503 } 1504 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1505 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1506 } 1507 PetscFunctionReturn(0); 1508 } 1509 1510 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1511 { 1512 Mat loc_divudotp; 1513 Vec p,v,vins,quad_vec,*quad_vecs; 1514 ISLocalToGlobalMapping map; 1515 IS *faces,*edges; 1516 PetscScalar *vals; 1517 const PetscScalar *array; 1518 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1519 PetscMPIInt rank; 1520 PetscErrorCode ierr; 1521 1522 PetscFunctionBegin; 1523 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1524 if (graph->twodim) { 1525 lmaxneighs = 2; 1526 } else { 1527 lmaxneighs = 1; 1528 for (i=0;i<ne;i++) { 1529 const PetscInt *idxs; 1530 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1531 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1532 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1533 } 1534 lmaxneighs++; /* graph count does not include self */ 1535 } 1536 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1537 maxsize = 0; 1538 for (i=0;i<ne;i++) { 1539 PetscInt nn; 1540 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1541 maxsize = PetscMax(maxsize,nn); 1542 } 1543 for (i=0;i<nf;i++) { 1544 PetscInt nn; 1545 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1546 maxsize = PetscMax(maxsize,nn); 1547 } 1548 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1549 /* create vectors to hold quadrature weights */ 1550 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1551 if (!transpose) { 1552 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1553 } else { 1554 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1555 } 1556 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1557 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1558 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1559 for (i=0;i<maxneighs;i++) { 1560 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1561 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1562 } 1563 1564 /* compute local quad vec */ 1565 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1566 if (!transpose) { 1567 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1568 } else { 1569 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1570 } 1571 ierr = VecSet(p,1.);CHKERRQ(ierr); 1572 if (!transpose) { 1573 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1574 } else { 1575 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1576 } 1577 if (vl2l) { 1578 ierr = VecGetSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1579 } else { 1580 vins = v; 1581 } 1582 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1583 ierr = VecDestroy(&p);CHKERRQ(ierr); 1584 1585 /* insert in global quadrature vecs */ 1586 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1587 for (i=0;i<nf;i++) { 1588 const PetscInt *idxs; 1589 PetscInt idx,nn,j; 1590 1591 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1592 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1593 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1594 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1595 idx = -(idx+1); 1596 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1597 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1598 } 1599 for (i=0;i<ne;i++) { 1600 const PetscInt *idxs; 1601 PetscInt idx,nn,j; 1602 1603 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1604 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1605 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1606 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1607 idx = -(idx+1); 1608 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1609 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1610 } 1611 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1612 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1613 if (vl2l) { 1614 ierr = VecRestoreSubVector(v,vl2l,&vins);CHKERRQ(ierr); 1615 } 1616 ierr = VecDestroy(&v);CHKERRQ(ierr); 1617 ierr = PetscFree(vals);CHKERRQ(ierr); 1618 1619 /* assemble near null space */ 1620 for (i=0;i<maxneighs;i++) { 1621 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1622 } 1623 for (i=0;i<maxneighs;i++) { 1624 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1625 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1626 } 1627 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1628 PetscFunctionReturn(0); 1629 } 1630 1631 1632 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1633 { 1634 PetscErrorCode ierr; 1635 Vec local,global; 1636 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1637 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1638 PetscBool monolithic = PETSC_FALSE; 1639 1640 PetscFunctionBegin; 1641 /* need to convert from global to local topology information and remove references to information in global ordering */ 1642 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1643 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1644 if (pcbddc->user_provided_isfordofs) { 1645 if (pcbddc->n_ISForDofs) { 1646 PetscInt i; 1647 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1648 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1649 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1650 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1651 } 1652 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1653 pcbddc->n_ISForDofs = 0; 1654 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1655 } 1656 } else { 1657 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1658 DM dm; 1659 1660 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1661 if (!dm) { 1662 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1663 } 1664 if (dm) { 1665 IS *fields; 1666 PetscInt nf,i; 1667 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1668 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1669 for (i=0;i<nf;i++) { 1670 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1671 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1672 } 1673 ierr = PetscFree(fields);CHKERRQ(ierr); 1674 pcbddc->n_ISForDofsLocal = nf; 1675 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1676 PetscContainer c; 1677 1678 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1679 if (c) { 1680 MatISLocalFields lf; 1681 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1682 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1683 } else { /* fallback, create the default fields if bs > 1 */ 1684 PetscInt i, n = matis->A->rmap->n; 1685 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1686 if (i > 1) { 1687 pcbddc->n_ISForDofsLocal = i; 1688 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1689 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1690 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1691 } 1692 } 1693 } 1694 } 1695 } else { 1696 PetscInt i; 1697 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1698 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1699 } 1700 } 1701 } 1702 1703 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1704 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1705 } else if (pcbddc->DirichletBoundariesLocal) { 1706 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1707 } 1708 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1709 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1710 } else if (pcbddc->NeumannBoundariesLocal) { 1711 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1712 } 1713 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1714 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1715 } 1716 ierr = VecDestroy(&global);CHKERRQ(ierr); 1717 ierr = VecDestroy(&local);CHKERRQ(ierr); 1718 1719 PetscFunctionReturn(0); 1720 } 1721 1722 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1723 { 1724 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1725 PetscErrorCode ierr; 1726 IS nis; 1727 const PetscInt *idxs; 1728 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1729 PetscBool *ld; 1730 1731 PetscFunctionBegin; 1732 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1733 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1734 if (mop == MPI_LAND) { 1735 /* init rootdata with true */ 1736 ld = (PetscBool*) matis->sf_rootdata; 1737 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1738 } else { 1739 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1740 } 1741 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1742 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1743 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1744 ld = (PetscBool*) matis->sf_leafdata; 1745 for (i=0;i<nd;i++) 1746 if (-1 < idxs[i] && idxs[i] < n) 1747 ld[idxs[i]] = PETSC_TRUE; 1748 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1749 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1750 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1751 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1752 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1753 if (mop == MPI_LAND) { 1754 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1755 } else { 1756 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1757 } 1758 for (i=0,nnd=0;i<n;i++) 1759 if (ld[i]) 1760 nidxs[nnd++] = i; 1761 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1762 ierr = ISDestroy(is);CHKERRQ(ierr); 1763 *is = nis; 1764 PetscFunctionReturn(0); 1765 } 1766 1767 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1768 { 1769 PC_IS *pcis = (PC_IS*)(pc->data); 1770 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1771 PetscErrorCode ierr; 1772 1773 PetscFunctionBegin; 1774 if (!pcbddc->benign_have_null) { 1775 PetscFunctionReturn(0); 1776 } 1777 if (pcbddc->ChangeOfBasisMatrix) { 1778 Vec swap; 1779 1780 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1781 swap = pcbddc->work_change; 1782 pcbddc->work_change = r; 1783 r = swap; 1784 } 1785 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1786 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1787 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1788 ierr = VecSet(z,0.);CHKERRQ(ierr); 1789 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1790 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1791 if (pcbddc->ChangeOfBasisMatrix) { 1792 pcbddc->work_change = r; 1793 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1794 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1795 } 1796 PetscFunctionReturn(0); 1797 } 1798 1799 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1800 { 1801 PCBDDCBenignMatMult_ctx ctx; 1802 PetscErrorCode ierr; 1803 PetscBool apply_right,apply_left,reset_x; 1804 1805 PetscFunctionBegin; 1806 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1807 if (transpose) { 1808 apply_right = ctx->apply_left; 1809 apply_left = ctx->apply_right; 1810 } else { 1811 apply_right = ctx->apply_right; 1812 apply_left = ctx->apply_left; 1813 } 1814 reset_x = PETSC_FALSE; 1815 if (apply_right) { 1816 const PetscScalar *ax; 1817 PetscInt nl,i; 1818 1819 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1820 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1821 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1822 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1823 for (i=0;i<ctx->benign_n;i++) { 1824 PetscScalar sum,val; 1825 const PetscInt *idxs; 1826 PetscInt nz,j; 1827 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1828 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1829 sum = 0.; 1830 if (ctx->apply_p0) { 1831 val = ctx->work[idxs[nz-1]]; 1832 for (j=0;j<nz-1;j++) { 1833 sum += ctx->work[idxs[j]]; 1834 ctx->work[idxs[j]] += val; 1835 } 1836 } else { 1837 for (j=0;j<nz-1;j++) { 1838 sum += ctx->work[idxs[j]]; 1839 } 1840 } 1841 ctx->work[idxs[nz-1]] -= sum; 1842 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1843 } 1844 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1845 reset_x = PETSC_TRUE; 1846 } 1847 if (transpose) { 1848 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1849 } else { 1850 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1851 } 1852 if (reset_x) { 1853 ierr = VecResetArray(x);CHKERRQ(ierr); 1854 } 1855 if (apply_left) { 1856 PetscScalar *ay; 1857 PetscInt i; 1858 1859 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1860 for (i=0;i<ctx->benign_n;i++) { 1861 PetscScalar sum,val; 1862 const PetscInt *idxs; 1863 PetscInt nz,j; 1864 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1865 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1866 val = -ay[idxs[nz-1]]; 1867 if (ctx->apply_p0) { 1868 sum = 0.; 1869 for (j=0;j<nz-1;j++) { 1870 sum += ay[idxs[j]]; 1871 ay[idxs[j]] += val; 1872 } 1873 ay[idxs[nz-1]] += sum; 1874 } else { 1875 for (j=0;j<nz-1;j++) { 1876 ay[idxs[j]] += val; 1877 } 1878 ay[idxs[nz-1]] = 0.; 1879 } 1880 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1881 } 1882 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1883 } 1884 PetscFunctionReturn(0); 1885 } 1886 1887 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1888 { 1889 PetscErrorCode ierr; 1890 1891 PetscFunctionBegin; 1892 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1893 PetscFunctionReturn(0); 1894 } 1895 1896 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1897 { 1898 PetscErrorCode ierr; 1899 1900 PetscFunctionBegin; 1901 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1902 PetscFunctionReturn(0); 1903 } 1904 1905 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1906 { 1907 PC_IS *pcis = (PC_IS*)pc->data; 1908 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1909 PCBDDCBenignMatMult_ctx ctx; 1910 PetscErrorCode ierr; 1911 1912 PetscFunctionBegin; 1913 if (!restore) { 1914 Mat A_IB,A_BI; 1915 PetscScalar *work; 1916 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1917 1918 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1919 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1920 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1921 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1922 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1923 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1924 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1925 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1926 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1927 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1928 ctx->apply_left = PETSC_TRUE; 1929 ctx->apply_right = PETSC_FALSE; 1930 ctx->apply_p0 = PETSC_FALSE; 1931 ctx->benign_n = pcbddc->benign_n; 1932 if (reuse) { 1933 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1934 ctx->free = PETSC_FALSE; 1935 } else { /* TODO: could be optimized for successive solves */ 1936 ISLocalToGlobalMapping N_to_D; 1937 PetscInt i; 1938 1939 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1940 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1941 for (i=0;i<pcbddc->benign_n;i++) { 1942 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1943 } 1944 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1945 ctx->free = PETSC_TRUE; 1946 } 1947 ctx->A = pcis->A_IB; 1948 ctx->work = work; 1949 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1950 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1951 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1952 pcis->A_IB = A_IB; 1953 1954 /* A_BI as A_IB^T */ 1955 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1956 pcbddc->benign_original_mat = pcis->A_BI; 1957 pcis->A_BI = A_BI; 1958 } else { 1959 if (!pcbddc->benign_original_mat) { 1960 PetscFunctionReturn(0); 1961 } 1962 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1963 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1964 pcis->A_IB = ctx->A; 1965 ctx->A = NULL; 1966 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1967 pcis->A_BI = pcbddc->benign_original_mat; 1968 pcbddc->benign_original_mat = NULL; 1969 if (ctx->free) { 1970 PetscInt i; 1971 for (i=0;i<ctx->benign_n;i++) { 1972 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1973 } 1974 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1975 } 1976 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1977 ierr = PetscFree(ctx);CHKERRQ(ierr); 1978 } 1979 PetscFunctionReturn(0); 1980 } 1981 1982 /* used just in bddc debug mode */ 1983 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1984 { 1985 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1986 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1987 Mat An; 1988 PetscErrorCode ierr; 1989 1990 PetscFunctionBegin; 1991 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1992 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 1993 if (is1) { 1994 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 1995 ierr = MatDestroy(&An);CHKERRQ(ierr); 1996 } else { 1997 *B = An; 1998 } 1999 PetscFunctionReturn(0); 2000 } 2001 2002 /* TODO: add reuse flag */ 2003 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2004 { 2005 Mat Bt; 2006 PetscScalar *a,*bdata; 2007 const PetscInt *ii,*ij; 2008 PetscInt m,n,i,nnz,*bii,*bij; 2009 PetscBool flg_row; 2010 PetscErrorCode ierr; 2011 2012 PetscFunctionBegin; 2013 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2014 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2015 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2016 nnz = n; 2017 for (i=0;i<ii[n];i++) { 2018 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2019 } 2020 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2021 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2022 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2023 nnz = 0; 2024 bii[0] = 0; 2025 for (i=0;i<n;i++) { 2026 PetscInt j; 2027 for (j=ii[i];j<ii[i+1];j++) { 2028 PetscScalar entry = a[j]; 2029 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 2030 bij[nnz] = ij[j]; 2031 bdata[nnz] = entry; 2032 nnz++; 2033 } 2034 } 2035 bii[i+1] = nnz; 2036 } 2037 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2038 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2039 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2040 { 2041 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2042 b->free_a = PETSC_TRUE; 2043 b->free_ij = PETSC_TRUE; 2044 } 2045 *B = Bt; 2046 PetscFunctionReturn(0); 2047 } 2048 2049 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscInt *ncc, IS* cc[], IS* primalv) 2050 { 2051 Mat B = NULL; 2052 DM dm; 2053 IS is_dummy,*cc_n; 2054 ISLocalToGlobalMapping l2gmap_dummy; 2055 PCBDDCGraph graph; 2056 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2057 PetscInt i,n; 2058 PetscInt *xadj,*adjncy; 2059 PetscBool isplex = PETSC_FALSE; 2060 PetscErrorCode ierr; 2061 2062 PetscFunctionBegin; 2063 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2064 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2065 if (!dm) { 2066 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2067 } 2068 if (dm) { 2069 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2070 } 2071 if (isplex) { /* this code has been modified from plexpartition.c */ 2072 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2073 PetscInt *adj = NULL; 2074 IS cellNumbering; 2075 const PetscInt *cellNum; 2076 PetscBool useCone, useClosure; 2077 PetscSection section; 2078 PetscSegBuffer adjBuffer; 2079 PetscSF sfPoint; 2080 PetscErrorCode ierr; 2081 2082 PetscFunctionBegin; 2083 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2084 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2085 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2086 /* Build adjacency graph via a section/segbuffer */ 2087 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2088 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2089 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2090 /* Always use FVM adjacency to create partitioner graph */ 2091 ierr = DMPlexGetAdjacencyUseCone(dm, &useCone);CHKERRQ(ierr); 2092 ierr = DMPlexGetAdjacencyUseClosure(dm, &useClosure);CHKERRQ(ierr); 2093 ierr = DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);CHKERRQ(ierr); 2094 ierr = DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);CHKERRQ(ierr); 2095 ierr = DMPlexCreateCellNumbering_Internal(dm, PETSC_TRUE, &cellNumbering);CHKERRQ(ierr); 2096 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2097 for (n = 0, p = pStart; p < pEnd; p++) { 2098 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2099 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2100 adjSize = PETSC_DETERMINE; 2101 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2102 for (a = 0; a < adjSize; ++a) { 2103 const PetscInt point = adj[a]; 2104 if (point != p && pStart <= point && point < pEnd) { 2105 PetscInt *PETSC_RESTRICT pBuf; 2106 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2107 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2108 *pBuf = point; 2109 } 2110 } 2111 n++; 2112 } 2113 ierr = DMPlexSetAdjacencyUseCone(dm, useCone);CHKERRQ(ierr); 2114 ierr = DMPlexSetAdjacencyUseClosure(dm, useClosure);CHKERRQ(ierr); 2115 /* Derive CSR graph from section/segbuffer */ 2116 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2117 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2118 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2119 for (idx = 0, p = pStart; p < pEnd; p++) { 2120 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2121 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2122 } 2123 xadj[n] = size; 2124 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2125 /* Clean up */ 2126 ierr = ISDestroy(&cellNumbering);CHKERRQ(ierr); 2127 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2128 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2129 ierr = PetscFree(adj);CHKERRQ(ierr); 2130 graph->xadj = xadj; 2131 graph->adjncy = adjncy; 2132 } else { 2133 Mat A; 2134 PetscBool filter = PETSC_FALSE, isseqaij, flg_row; 2135 2136 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2137 if (!A->rmap->N || !A->cmap->N) { 2138 *ncc = 0; 2139 *cc = NULL; 2140 PetscFunctionReturn(0); 2141 } 2142 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2143 if (!isseqaij && filter) { 2144 PetscBool isseqdense; 2145 2146 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2147 if (!isseqdense) { 2148 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2149 } else { /* TODO: rectangular case and LDA */ 2150 PetscScalar *array; 2151 PetscReal chop=1.e-6; 2152 2153 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2154 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2155 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2156 for (i=0;i<n;i++) { 2157 PetscInt j; 2158 for (j=i+1;j<n;j++) { 2159 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2160 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2161 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2162 } 2163 } 2164 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2165 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2166 } 2167 } else { 2168 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2169 B = A; 2170 } 2171 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2172 2173 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2174 if (filter) { 2175 PetscScalar *data; 2176 PetscInt j,cum; 2177 2178 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2179 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2180 cum = 0; 2181 for (i=0;i<n;i++) { 2182 PetscInt t; 2183 2184 for (j=xadj[i];j<xadj[i+1];j++) { 2185 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2186 continue; 2187 } 2188 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2189 } 2190 t = xadj_filtered[i]; 2191 xadj_filtered[i] = cum; 2192 cum += t; 2193 } 2194 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2195 graph->xadj = xadj_filtered; 2196 graph->adjncy = adjncy_filtered; 2197 } else { 2198 graph->xadj = xadj; 2199 graph->adjncy = adjncy; 2200 } 2201 } 2202 /* compute local connected components using PCBDDCGraph */ 2203 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2204 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2205 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2206 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2207 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2208 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2209 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2210 2211 /* partial clean up */ 2212 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2213 if (B) { 2214 PetscBool flg_row; 2215 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2216 ierr = MatDestroy(&B);CHKERRQ(ierr); 2217 } 2218 if (isplex) { 2219 ierr = PetscFree(xadj);CHKERRQ(ierr); 2220 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2221 } 2222 2223 /* get back data */ 2224 if (isplex) { 2225 if (ncc) *ncc = graph->ncc; 2226 if (cc || primalv) { 2227 Mat A; 2228 PetscBT btv,btvt; 2229 PetscSection subSection; 2230 PetscInt *ids,cum,cump,*cids,*pids; 2231 2232 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2233 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2234 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2235 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2236 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2237 2238 cids[0] = 0; 2239 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2240 PetscInt j; 2241 2242 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2243 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2244 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2245 2246 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2247 for (k = 0; k < 2*size; k += 2) { 2248 PetscInt s, p = closure[k], off, dof, cdof; 2249 2250 ierr = PetscSectionGetConstraintDof(subSection, p, &cdof);CHKERRQ(ierr); 2251 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2252 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2253 for (s = 0; s < dof-cdof; s++) { 2254 if (PetscBTLookupSet(btvt,off+s)) continue; 2255 if (!PetscBTLookup(btv,off+s)) { 2256 ids[cum++] = off+s; 2257 } else { /* cross-vertex */ 2258 pids[cump++] = off+s; 2259 } 2260 } 2261 } 2262 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2263 } 2264 cids[i+1] = cum; 2265 /* mark dofs as already assigned */ 2266 for (j = cids[i]; j < cids[i+1]; j++) { 2267 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2268 } 2269 } 2270 if (cc) { 2271 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2272 for (i = 0; i < graph->ncc; i++) { 2273 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2274 } 2275 *cc = cc_n; 2276 } 2277 if (primalv) { 2278 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2279 } 2280 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2281 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2282 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2283 } 2284 } else { 2285 if (ncc) *ncc = graph->ncc; 2286 if (cc) { 2287 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2288 for (i=0;i<graph->ncc;i++) { 2289 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2290 } 2291 *cc = cc_n; 2292 } 2293 if (primalv) *primalv = NULL; 2294 } 2295 /* clean up graph */ 2296 graph->xadj = 0; 2297 graph->adjncy = 0; 2298 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2299 PetscFunctionReturn(0); 2300 } 2301 2302 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2303 { 2304 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2305 PC_IS* pcis = (PC_IS*)(pc->data); 2306 IS dirIS = NULL; 2307 PetscInt i; 2308 PetscErrorCode ierr; 2309 2310 PetscFunctionBegin; 2311 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2312 if (zerodiag) { 2313 Mat A; 2314 Vec vec3_N; 2315 PetscScalar *vals; 2316 const PetscInt *idxs; 2317 PetscInt nz,*count; 2318 2319 /* p0 */ 2320 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2321 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2322 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2323 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2324 for (i=0;i<nz;i++) vals[i] = 1.; 2325 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2326 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2327 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2328 /* v_I */ 2329 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2330 for (i=0;i<nz;i++) vals[i] = 0.; 2331 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2332 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2333 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2334 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2335 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2336 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2337 if (dirIS) { 2338 PetscInt n; 2339 2340 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2341 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2342 for (i=0;i<n;i++) vals[i] = 0.; 2343 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2344 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2345 } 2346 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2347 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2348 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2349 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2350 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2351 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2352 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2353 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2354 ierr = PetscFree(vals);CHKERRQ(ierr); 2355 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2356 2357 /* there should not be any pressure dofs lying on the interface */ 2358 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2359 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2360 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2361 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2362 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2363 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2364 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2365 ierr = PetscFree(count);CHKERRQ(ierr); 2366 } 2367 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2368 2369 /* check PCBDDCBenignGetOrSetP0 */ 2370 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2371 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2372 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2373 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2374 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2375 for (i=0;i<pcbddc->benign_n;i++) { 2376 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2377 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 2378 } 2379 PetscFunctionReturn(0); 2380 } 2381 2382 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2383 { 2384 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2385 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2386 PetscInt nz,n; 2387 PetscInt *interior_dofs,n_interior_dofs,nneu; 2388 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2389 PetscErrorCode ierr; 2390 2391 PetscFunctionBegin; 2392 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2393 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2394 for (n=0;n<pcbddc->benign_n;n++) { 2395 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2396 } 2397 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2398 pcbddc->benign_n = 0; 2399 2400 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2401 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2402 Checks if all the pressure dofs in each subdomain have a zero diagonal 2403 If not, a change of basis on pressures is not needed 2404 since the local Schur complements are already SPD 2405 */ 2406 has_null_pressures = PETSC_TRUE; 2407 have_null = PETSC_TRUE; 2408 if (pcbddc->n_ISForDofsLocal) { 2409 IS iP = NULL; 2410 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2411 2412 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2413 ierr = PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2414 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2415 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2416 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2417 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2418 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2419 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2420 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2421 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2422 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2423 if (iP) { 2424 IS newpressures; 2425 2426 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2427 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2428 pressures = newpressures; 2429 } 2430 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2431 if (!sorted) { 2432 ierr = ISSort(pressures);CHKERRQ(ierr); 2433 } 2434 } else { 2435 pressures = NULL; 2436 } 2437 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2438 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2439 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2440 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2441 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2442 if (!sorted) { 2443 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2444 } 2445 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2446 zerodiag_save = zerodiag; 2447 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2448 if (!nz) { 2449 if (n) have_null = PETSC_FALSE; 2450 has_null_pressures = PETSC_FALSE; 2451 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2452 } 2453 recompute_zerodiag = PETSC_FALSE; 2454 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2455 zerodiag_subs = NULL; 2456 pcbddc->benign_n = 0; 2457 n_interior_dofs = 0; 2458 interior_dofs = NULL; 2459 nneu = 0; 2460 if (pcbddc->NeumannBoundariesLocal) { 2461 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2462 } 2463 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2464 if (checkb) { /* need to compute interior nodes */ 2465 PetscInt n,i,j; 2466 PetscInt n_neigh,*neigh,*n_shared,**shared; 2467 PetscInt *iwork; 2468 2469 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2470 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2471 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2472 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2473 for (i=1;i<n_neigh;i++) 2474 for (j=0;j<n_shared[i];j++) 2475 iwork[shared[i][j]] += 1; 2476 for (i=0;i<n;i++) 2477 if (!iwork[i]) 2478 interior_dofs[n_interior_dofs++] = i; 2479 ierr = PetscFree(iwork);CHKERRQ(ierr); 2480 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2481 } 2482 if (has_null_pressures) { 2483 IS *subs; 2484 PetscInt nsubs,i,j,nl; 2485 const PetscInt *idxs; 2486 PetscScalar *array; 2487 Vec *work; 2488 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2489 2490 subs = pcbddc->local_subs; 2491 nsubs = pcbddc->n_local_subs; 2492 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2493 if (checkb) { 2494 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2495 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2496 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2497 /* work[0] = 1_p */ 2498 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2499 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2500 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2501 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2502 /* work[0] = 1_v */ 2503 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2504 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2505 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2506 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2507 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2508 } 2509 if (nsubs > 1) { 2510 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2511 for (i=0;i<nsubs;i++) { 2512 ISLocalToGlobalMapping l2g; 2513 IS t_zerodiag_subs; 2514 PetscInt nl; 2515 2516 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2517 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2518 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2519 if (nl) { 2520 PetscBool valid = PETSC_TRUE; 2521 2522 if (checkb) { 2523 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2524 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2525 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2526 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2527 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2528 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2529 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2530 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2531 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2532 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2533 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2534 for (j=0;j<n_interior_dofs;j++) { 2535 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2536 valid = PETSC_FALSE; 2537 break; 2538 } 2539 } 2540 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2541 } 2542 if (valid && nneu) { 2543 const PetscInt *idxs; 2544 PetscInt nzb; 2545 2546 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2547 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2548 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2549 if (nzb) valid = PETSC_FALSE; 2550 } 2551 if (valid && pressures) { 2552 IS t_pressure_subs; 2553 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2554 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2555 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2556 } 2557 if (valid) { 2558 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2559 pcbddc->benign_n++; 2560 } else { 2561 recompute_zerodiag = PETSC_TRUE; 2562 } 2563 } 2564 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2565 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2566 } 2567 } else { /* there's just one subdomain (or zero if they have not been detected */ 2568 PetscBool valid = PETSC_TRUE; 2569 2570 if (nneu) valid = PETSC_FALSE; 2571 if (valid && pressures) { 2572 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2573 } 2574 if (valid && checkb) { 2575 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2576 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2577 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2578 for (j=0;j<n_interior_dofs;j++) { 2579 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2580 valid = PETSC_FALSE; 2581 break; 2582 } 2583 } 2584 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2585 } 2586 if (valid) { 2587 pcbddc->benign_n = 1; 2588 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2589 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2590 zerodiag_subs[0] = zerodiag; 2591 } 2592 } 2593 if (checkb) { 2594 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2595 } 2596 } 2597 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2598 2599 if (!pcbddc->benign_n) { 2600 PetscInt n; 2601 2602 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2603 recompute_zerodiag = PETSC_FALSE; 2604 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2605 if (n) { 2606 has_null_pressures = PETSC_FALSE; 2607 have_null = PETSC_FALSE; 2608 } 2609 } 2610 2611 /* final check for null pressures */ 2612 if (zerodiag && pressures) { 2613 PetscInt nz,np; 2614 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2615 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2616 if (nz != np) have_null = PETSC_FALSE; 2617 } 2618 2619 if (recompute_zerodiag) { 2620 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2621 if (pcbddc->benign_n == 1) { 2622 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2623 zerodiag = zerodiag_subs[0]; 2624 } else { 2625 PetscInt i,nzn,*new_idxs; 2626 2627 nzn = 0; 2628 for (i=0;i<pcbddc->benign_n;i++) { 2629 PetscInt ns; 2630 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2631 nzn += ns; 2632 } 2633 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2634 nzn = 0; 2635 for (i=0;i<pcbddc->benign_n;i++) { 2636 PetscInt ns,*idxs; 2637 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2638 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2639 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2640 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2641 nzn += ns; 2642 } 2643 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2644 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2645 } 2646 have_null = PETSC_FALSE; 2647 } 2648 2649 /* Prepare matrix to compute no-net-flux */ 2650 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2651 Mat A,loc_divudotp; 2652 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2653 IS row,col,isused = NULL; 2654 PetscInt M,N,n,st,n_isused; 2655 2656 if (pressures) { 2657 isused = pressures; 2658 } else { 2659 isused = zerodiag_save; 2660 } 2661 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2662 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2663 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2664 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2665 n_isused = 0; 2666 if (isused) { 2667 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2668 } 2669 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2670 st = st-n_isused; 2671 if (n) { 2672 const PetscInt *gidxs; 2673 2674 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2675 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2676 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2677 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2678 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2679 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2680 } else { 2681 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2682 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2683 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2684 } 2685 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2686 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2687 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2688 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2689 ierr = ISDestroy(&row);CHKERRQ(ierr); 2690 ierr = ISDestroy(&col);CHKERRQ(ierr); 2691 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2692 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2693 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2694 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2695 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2696 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2697 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2698 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2699 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2700 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2701 } 2702 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2703 2704 /* change of basis and p0 dofs */ 2705 if (has_null_pressures) { 2706 IS zerodiagc; 2707 const PetscInt *idxs,*idxsc; 2708 PetscInt i,s,*nnz; 2709 2710 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2711 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2712 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2713 /* local change of basis for pressures */ 2714 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2715 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2716 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2717 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2718 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2719 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2720 for (i=0;i<pcbddc->benign_n;i++) { 2721 PetscInt nzs,j; 2722 2723 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2724 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2725 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2726 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2727 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2728 } 2729 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2730 ierr = PetscFree(nnz);CHKERRQ(ierr); 2731 /* set identity on velocities */ 2732 for (i=0;i<n-nz;i++) { 2733 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2734 } 2735 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2736 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2737 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2738 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2739 /* set change on pressures */ 2740 for (s=0;s<pcbddc->benign_n;s++) { 2741 PetscScalar *array; 2742 PetscInt nzs; 2743 2744 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2745 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2746 for (i=0;i<nzs-1;i++) { 2747 PetscScalar vals[2]; 2748 PetscInt cols[2]; 2749 2750 cols[0] = idxs[i]; 2751 cols[1] = idxs[nzs-1]; 2752 vals[0] = 1.; 2753 vals[1] = 1.; 2754 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2755 } 2756 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2757 for (i=0;i<nzs-1;i++) array[i] = -1.; 2758 array[nzs-1] = 1.; 2759 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2760 /* store local idxs for p0 */ 2761 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2762 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2763 ierr = PetscFree(array);CHKERRQ(ierr); 2764 } 2765 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2766 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2767 /* project if needed */ 2768 if (pcbddc->benign_change_explicit) { 2769 Mat M; 2770 2771 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2772 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2773 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2774 ierr = MatDestroy(&M);CHKERRQ(ierr); 2775 } 2776 /* store global idxs for p0 */ 2777 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2778 } 2779 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2780 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2781 2782 /* determines if the coarse solver will be singular or not */ 2783 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2784 /* determines if the problem has subdomains with 0 pressure block */ 2785 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2786 *zerodiaglocal = zerodiag; 2787 PetscFunctionReturn(0); 2788 } 2789 2790 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2791 { 2792 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2793 PetscScalar *array; 2794 PetscErrorCode ierr; 2795 2796 PetscFunctionBegin; 2797 if (!pcbddc->benign_sf) { 2798 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2799 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2800 } 2801 if (get) { 2802 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2803 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2804 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2805 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2806 } else { 2807 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2808 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2809 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2810 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2811 } 2812 PetscFunctionReturn(0); 2813 } 2814 2815 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2816 { 2817 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2818 PetscErrorCode ierr; 2819 2820 PetscFunctionBegin; 2821 /* TODO: add error checking 2822 - avoid nested pop (or push) calls. 2823 - cannot push before pop. 2824 - cannot call this if pcbddc->local_mat is NULL 2825 */ 2826 if (!pcbddc->benign_n) { 2827 PetscFunctionReturn(0); 2828 } 2829 if (pop) { 2830 if (pcbddc->benign_change_explicit) { 2831 IS is_p0; 2832 MatReuse reuse; 2833 2834 /* extract B_0 */ 2835 reuse = MAT_INITIAL_MATRIX; 2836 if (pcbddc->benign_B0) { 2837 reuse = MAT_REUSE_MATRIX; 2838 } 2839 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2840 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2841 /* remove rows and cols from local problem */ 2842 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2843 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2844 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2845 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2846 } else { 2847 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2848 PetscScalar *vals; 2849 PetscInt i,n,*idxs_ins; 2850 2851 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2852 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2853 if (!pcbddc->benign_B0) { 2854 PetscInt *nnz; 2855 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2856 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2857 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2858 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2859 for (i=0;i<pcbddc->benign_n;i++) { 2860 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2861 nnz[i] = n - nnz[i]; 2862 } 2863 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2864 ierr = PetscFree(nnz);CHKERRQ(ierr); 2865 } 2866 2867 for (i=0;i<pcbddc->benign_n;i++) { 2868 PetscScalar *array; 2869 PetscInt *idxs,j,nz,cum; 2870 2871 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2872 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2873 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2874 for (j=0;j<nz;j++) vals[j] = 1.; 2875 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2876 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2877 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2878 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2879 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2880 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2881 cum = 0; 2882 for (j=0;j<n;j++) { 2883 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2884 vals[cum] = array[j]; 2885 idxs_ins[cum] = j; 2886 cum++; 2887 } 2888 } 2889 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2890 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2891 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2892 } 2893 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2894 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2895 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2896 } 2897 } else { /* push */ 2898 if (pcbddc->benign_change_explicit) { 2899 PetscInt i; 2900 2901 for (i=0;i<pcbddc->benign_n;i++) { 2902 PetscScalar *B0_vals; 2903 PetscInt *B0_cols,B0_ncol; 2904 2905 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2906 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2907 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2908 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2909 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2910 } 2911 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2912 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2913 } else { 2914 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2915 } 2916 } 2917 PetscFunctionReturn(0); 2918 } 2919 2920 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2921 { 2922 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2923 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2924 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2925 PetscBLASInt *B_iwork,*B_ifail; 2926 PetscScalar *work,lwork; 2927 PetscScalar *St,*S,*eigv; 2928 PetscScalar *Sarray,*Starray; 2929 PetscReal *eigs,thresh; 2930 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2931 PetscBool allocated_S_St; 2932 #if defined(PETSC_USE_COMPLEX) 2933 PetscReal *rwork; 2934 #endif 2935 PetscErrorCode ierr; 2936 2937 PetscFunctionBegin; 2938 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2939 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2940 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\nRerun with -sub_schurs_hermitian 1 -sub_schurs_posdef 1 if the problem is SPD",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2941 2942 if (pcbddc->dbg_flag) { 2943 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2944 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2945 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2946 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2947 } 2948 2949 if (pcbddc->dbg_flag) { 2950 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2951 } 2952 2953 /* max size of subsets */ 2954 mss = 0; 2955 for (i=0;i<sub_schurs->n_subs;i++) { 2956 PetscInt subset_size; 2957 2958 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2959 mss = PetscMax(mss,subset_size); 2960 } 2961 2962 /* min/max and threshold */ 2963 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2964 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2965 nmax = PetscMax(nmin,nmax); 2966 allocated_S_St = PETSC_FALSE; 2967 if (nmin) { 2968 allocated_S_St = PETSC_TRUE; 2969 } 2970 2971 /* allocate lapack workspace */ 2972 cum = cum2 = 0; 2973 maxneigs = 0; 2974 for (i=0;i<sub_schurs->n_subs;i++) { 2975 PetscInt n,subset_size; 2976 2977 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2978 n = PetscMin(subset_size,nmax); 2979 cum += subset_size; 2980 cum2 += subset_size*n; 2981 maxneigs = PetscMax(maxneigs,n); 2982 } 2983 if (mss) { 2984 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2985 PetscBLASInt B_itype = 1; 2986 PetscBLASInt B_N = mss; 2987 PetscReal zero = 0.0; 2988 PetscReal eps = 0.0; /* dlamch? */ 2989 2990 B_lwork = -1; 2991 S = NULL; 2992 St = NULL; 2993 eigs = NULL; 2994 eigv = NULL; 2995 B_iwork = NULL; 2996 B_ifail = NULL; 2997 #if defined(PETSC_USE_COMPLEX) 2998 rwork = NULL; 2999 #endif 3000 thresh = 1.0; 3001 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3002 #if defined(PETSC_USE_COMPLEX) 3003 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3004 #else 3005 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3006 #endif 3007 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3008 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3009 } else { 3010 /* TODO */ 3011 } 3012 } else { 3013 lwork = 0; 3014 } 3015 3016 nv = 0; 3017 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3018 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3019 } 3020 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3021 if (allocated_S_St) { 3022 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3023 } 3024 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3025 #if defined(PETSC_USE_COMPLEX) 3026 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3027 #endif 3028 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3029 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3030 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3031 nv+cum,&pcbddc->adaptive_constraints_idxs, 3032 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3033 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 3034 3035 maxneigs = 0; 3036 cum = cumarray = 0; 3037 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3038 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3039 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3040 const PetscInt *idxs; 3041 3042 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3043 for (cum=0;cum<nv;cum++) { 3044 pcbddc->adaptive_constraints_n[cum] = 1; 3045 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3046 pcbddc->adaptive_constraints_data[cum] = 1.0; 3047 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3048 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3049 } 3050 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3051 } 3052 3053 if (mss) { /* multilevel */ 3054 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3055 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3056 } 3057 3058 thresh = pcbddc->adaptive_threshold; 3059 for (i=0;i<sub_schurs->n_subs;i++) { 3060 const PetscInt *idxs; 3061 PetscReal upper,lower; 3062 PetscInt j,subset_size,eigs_start = 0; 3063 PetscBLASInt B_N; 3064 PetscBool same_data = PETSC_FALSE; 3065 3066 if (pcbddc->use_deluxe_scaling) { 3067 upper = PETSC_MAX_REAL; 3068 lower = thresh; 3069 } else { 3070 upper = 1./thresh; 3071 lower = 0.; 3072 } 3073 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3074 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3075 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3076 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3077 if (sub_schurs->is_hermitian) { 3078 PetscInt j,k; 3079 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 3080 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3081 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3082 } 3083 for (j=0;j<subset_size;j++) { 3084 for (k=j;k<subset_size;k++) { 3085 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3086 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3087 } 3088 } 3089 } else { 3090 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3091 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3092 } 3093 } else { 3094 S = Sarray + cumarray; 3095 St = Starray + cumarray; 3096 } 3097 /* see if we can save some work */ 3098 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3099 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 3100 } 3101 3102 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3103 B_neigs = 0; 3104 } else { 3105 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 3106 PetscBLASInt B_itype = 1; 3107 PetscBLASInt B_IL, B_IU; 3108 PetscReal eps = -1.0; /* dlamch? */ 3109 PetscInt nmin_s; 3110 PetscBool compute_range = PETSC_FALSE; 3111 3112 if (pcbddc->dbg_flag) { 3113 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]); 3114 } 3115 3116 compute_range = PETSC_FALSE; 3117 if (thresh > 1.+PETSC_SMALL && !same_data) { 3118 compute_range = PETSC_TRUE; 3119 } 3120 3121 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3122 if (compute_range) { 3123 3124 /* ask for eigenvalues larger than thresh */ 3125 #if defined(PETSC_USE_COMPLEX) 3126 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3127 #else 3128 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3129 #endif 3130 } else if (!same_data) { 3131 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3132 B_IL = 1; 3133 #if defined(PETSC_USE_COMPLEX) 3134 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3135 #else 3136 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3137 #endif 3138 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3139 PetscInt k; 3140 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3141 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3142 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3143 nmin = nmax; 3144 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 3145 for (k=0;k<nmax;k++) { 3146 eigs[k] = 1./PETSC_SMALL; 3147 eigv[k*(subset_size+1)] = 1.0; 3148 } 3149 } 3150 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3151 if (B_ierr) { 3152 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3153 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3154 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3155 } 3156 3157 if (B_neigs > nmax) { 3158 if (pcbddc->dbg_flag) { 3159 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 3160 } 3161 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 3162 B_neigs = nmax; 3163 } 3164 3165 nmin_s = PetscMin(nmin,B_N); 3166 if (B_neigs < nmin_s) { 3167 PetscBLASInt B_neigs2; 3168 3169 if (pcbddc->use_deluxe_scaling) { 3170 B_IL = B_N - nmin_s + 1; 3171 B_IU = B_N - B_neigs; 3172 } else { 3173 B_IL = B_neigs + 1; 3174 B_IU = nmin_s; 3175 } 3176 if (pcbddc->dbg_flag) { 3177 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 3178 } 3179 if (sub_schurs->is_hermitian) { 3180 PetscInt j,k; 3181 for (j=0;j<subset_size;j++) { 3182 for (k=j;k<subset_size;k++) { 3183 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3184 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3185 } 3186 } 3187 } else { 3188 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3189 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3190 } 3191 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3192 #if defined(PETSC_USE_COMPLEX) 3193 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3194 #else 3195 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3196 #endif 3197 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3198 B_neigs += B_neigs2; 3199 } 3200 if (B_ierr) { 3201 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3202 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3203 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3204 } 3205 if (pcbddc->dbg_flag) { 3206 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3207 for (j=0;j<B_neigs;j++) { 3208 if (eigs[j] == 0.0) { 3209 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3210 } else { 3211 if (pcbddc->use_deluxe_scaling) { 3212 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3213 } else { 3214 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3215 } 3216 } 3217 } 3218 } 3219 } else { 3220 /* TODO */ 3221 } 3222 } 3223 /* change the basis back to the original one */ 3224 if (sub_schurs->change) { 3225 Mat change,phi,phit; 3226 3227 if (pcbddc->dbg_flag > 1) { 3228 PetscInt ii; 3229 for (ii=0;ii<B_neigs;ii++) { 3230 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3231 for (j=0;j<B_N;j++) { 3232 #if defined(PETSC_USE_COMPLEX) 3233 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3234 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3235 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3236 #else 3237 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3238 #endif 3239 } 3240 } 3241 } 3242 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3243 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3244 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3245 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3246 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3247 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3248 } 3249 maxneigs = PetscMax(B_neigs,maxneigs); 3250 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3251 if (B_neigs) { 3252 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3253 3254 if (pcbddc->dbg_flag > 1) { 3255 PetscInt ii; 3256 for (ii=0;ii<B_neigs;ii++) { 3257 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3258 for (j=0;j<B_N;j++) { 3259 #if defined(PETSC_USE_COMPLEX) 3260 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3261 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3262 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3263 #else 3264 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3265 #endif 3266 } 3267 } 3268 } 3269 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3270 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3271 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3272 cum++; 3273 } 3274 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3275 /* shift for next computation */ 3276 cumarray += subset_size*subset_size; 3277 } 3278 if (pcbddc->dbg_flag) { 3279 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3280 } 3281 3282 if (mss) { 3283 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3284 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3285 /* destroy matrices (junk) */ 3286 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3287 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3288 } 3289 if (allocated_S_St) { 3290 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3291 } 3292 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3293 #if defined(PETSC_USE_COMPLEX) 3294 ierr = PetscFree(rwork);CHKERRQ(ierr); 3295 #endif 3296 if (pcbddc->dbg_flag) { 3297 PetscInt maxneigs_r; 3298 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3299 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3300 } 3301 PetscFunctionReturn(0); 3302 } 3303 3304 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3305 { 3306 PetscScalar *coarse_submat_vals; 3307 PetscErrorCode ierr; 3308 3309 PetscFunctionBegin; 3310 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3311 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3312 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3313 3314 /* Setup local neumann solver ksp_R */ 3315 /* PCBDDCSetUpLocalScatters should be called first! */ 3316 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3317 3318 /* 3319 Setup local correction and local part of coarse basis. 3320 Gives back the dense local part of the coarse matrix in column major ordering 3321 */ 3322 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3323 3324 /* Compute total number of coarse nodes and setup coarse solver */ 3325 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3326 3327 /* free */ 3328 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3329 PetscFunctionReturn(0); 3330 } 3331 3332 PetscErrorCode PCBDDCResetCustomization(PC pc) 3333 { 3334 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3335 PetscErrorCode ierr; 3336 3337 PetscFunctionBegin; 3338 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3339 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3340 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3341 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3342 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3343 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3344 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3345 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3346 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3347 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3348 PetscFunctionReturn(0); 3349 } 3350 3351 PetscErrorCode PCBDDCResetTopography(PC pc) 3352 { 3353 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3354 PetscInt i; 3355 PetscErrorCode ierr; 3356 3357 PetscFunctionBegin; 3358 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3359 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3360 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3361 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3362 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3363 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3364 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3365 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3366 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3367 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3368 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3369 for (i=0;i<pcbddc->n_local_subs;i++) { 3370 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3371 } 3372 pcbddc->n_local_subs = 0; 3373 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3374 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3375 pcbddc->graphanalyzed = PETSC_FALSE; 3376 pcbddc->recompute_topography = PETSC_TRUE; 3377 PetscFunctionReturn(0); 3378 } 3379 3380 PetscErrorCode PCBDDCResetSolvers(PC pc) 3381 { 3382 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3383 PetscErrorCode ierr; 3384 3385 PetscFunctionBegin; 3386 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3387 if (pcbddc->coarse_phi_B) { 3388 PetscScalar *array; 3389 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3390 ierr = PetscFree(array);CHKERRQ(ierr); 3391 } 3392 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3393 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3394 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3395 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3396 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3397 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3398 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3399 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3400 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3401 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3402 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3403 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3404 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3405 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3406 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3407 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3408 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3409 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3410 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3411 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3412 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3413 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3414 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3415 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3416 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3417 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3418 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3419 if (pcbddc->benign_zerodiag_subs) { 3420 PetscInt i; 3421 for (i=0;i<pcbddc->benign_n;i++) { 3422 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3423 } 3424 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3425 } 3426 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3427 PetscFunctionReturn(0); 3428 } 3429 3430 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3431 { 3432 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3433 PC_IS *pcis = (PC_IS*)pc->data; 3434 VecType impVecType; 3435 PetscInt n_constraints,n_R,old_size; 3436 PetscErrorCode ierr; 3437 3438 PetscFunctionBegin; 3439 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3440 n_R = pcis->n - pcbddc->n_vertices; 3441 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3442 /* local work vectors (try to avoid unneeded work)*/ 3443 /* R nodes */ 3444 old_size = -1; 3445 if (pcbddc->vec1_R) { 3446 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3447 } 3448 if (n_R != old_size) { 3449 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3450 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3451 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3452 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3453 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3454 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3455 } 3456 /* local primal dofs */ 3457 old_size = -1; 3458 if (pcbddc->vec1_P) { 3459 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3460 } 3461 if (pcbddc->local_primal_size != old_size) { 3462 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3463 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3464 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3465 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3466 } 3467 /* local explicit constraints */ 3468 old_size = -1; 3469 if (pcbddc->vec1_C) { 3470 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3471 } 3472 if (n_constraints && n_constraints != old_size) { 3473 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3474 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3475 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3476 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3477 } 3478 PetscFunctionReturn(0); 3479 } 3480 3481 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3482 { 3483 PetscErrorCode ierr; 3484 /* pointers to pcis and pcbddc */ 3485 PC_IS* pcis = (PC_IS*)pc->data; 3486 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3487 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3488 /* submatrices of local problem */ 3489 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3490 /* submatrices of local coarse problem */ 3491 Mat S_VV,S_CV,S_VC,S_CC; 3492 /* working matrices */ 3493 Mat C_CR; 3494 /* additional working stuff */ 3495 PC pc_R; 3496 Mat F,Brhs = NULL; 3497 Vec dummy_vec; 3498 PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs; 3499 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3500 PetscScalar *work; 3501 PetscInt *idx_V_B; 3502 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3503 PetscInt i,n_R,n_D,n_B; 3504 3505 /* some shortcuts to scalars */ 3506 PetscScalar one=1.0,m_one=-1.0; 3507 3508 PetscFunctionBegin; 3509 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3510 3511 /* Set Non-overlapping dimensions */ 3512 n_vertices = pcbddc->n_vertices; 3513 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3514 n_B = pcis->n_B; 3515 n_D = pcis->n - n_B; 3516 n_R = pcis->n - n_vertices; 3517 3518 /* vertices in boundary numbering */ 3519 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3520 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3521 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3522 3523 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3524 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3525 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3526 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3527 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3528 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3529 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3530 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3531 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3532 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3533 3534 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3535 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3536 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3537 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3538 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3539 lda_rhs = n_R; 3540 need_benign_correction = PETSC_FALSE; 3541 if (isLU || isILU || isCHOL) { 3542 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3543 } else if (sub_schurs && sub_schurs->reuse_solver) { 3544 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3545 MatFactorType type; 3546 3547 F = reuse_solver->F; 3548 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3549 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3550 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3551 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3552 } else { 3553 F = NULL; 3554 } 3555 3556 /* determine if we can use a sparse right-hand side */ 3557 sparserhs = PETSC_FALSE; 3558 if (F) { 3559 const MatSolverPackage solver; 3560 3561 ierr = MatFactorGetSolverPackage(F,&solver);CHKERRQ(ierr); 3562 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3563 } 3564 3565 /* allocate workspace */ 3566 n = 0; 3567 if (n_constraints) { 3568 n += lda_rhs*n_constraints; 3569 } 3570 if (n_vertices) { 3571 n = PetscMax(2*lda_rhs*n_vertices,n); 3572 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3573 } 3574 if (!pcbddc->symmetric_primal) { 3575 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3576 } 3577 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3578 3579 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3580 dummy_vec = NULL; 3581 if (need_benign_correction && lda_rhs != n_R && F) { 3582 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3583 } 3584 3585 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3586 if (n_constraints) { 3587 Mat M1,M2,M3,C_B; 3588 IS is_aux; 3589 PetscScalar *array,*array2; 3590 3591 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3592 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3593 3594 /* Extract constraints on R nodes: C_{CR} */ 3595 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3596 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3597 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3598 3599 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3600 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3601 if (!sparserhs) { 3602 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3603 for (i=0;i<n_constraints;i++) { 3604 const PetscScalar *row_cmat_values; 3605 const PetscInt *row_cmat_indices; 3606 PetscInt size_of_constraint,j; 3607 3608 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3609 for (j=0;j<size_of_constraint;j++) { 3610 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3611 } 3612 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3613 } 3614 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 3615 } else { 3616 Mat tC_CR; 3617 3618 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3619 if (lda_rhs != n_R) { 3620 PetscScalar *aa; 3621 PetscInt r,*ii,*jj; 3622 PetscBool done; 3623 3624 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3625 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");CHKERRQ(ierr); 3626 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 3627 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 3628 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3629 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");CHKERRQ(ierr); 3630 } else { 3631 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 3632 tC_CR = C_CR; 3633 } 3634 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 3635 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 3636 } 3637 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3638 if (F) { 3639 if (need_benign_correction) { 3640 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3641 3642 /* rhs is already zero on interior dofs, no need to change the rhs */ 3643 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3644 } 3645 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 3646 if (need_benign_correction) { 3647 PetscScalar *marr; 3648 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3649 3650 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3651 if (lda_rhs != n_R) { 3652 for (i=0;i<n_constraints;i++) { 3653 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3654 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3655 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3656 } 3657 } else { 3658 for (i=0;i<n_constraints;i++) { 3659 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3660 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3661 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3662 } 3663 } 3664 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3665 } 3666 } else { 3667 PetscScalar *marr; 3668 3669 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3670 for (i=0;i<n_constraints;i++) { 3671 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3672 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3673 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3674 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3675 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3676 } 3677 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3678 } 3679 if (sparserhs) { 3680 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3681 } 3682 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3683 if (!pcbddc->switch_static) { 3684 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3685 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3686 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3687 for (i=0;i<n_constraints;i++) { 3688 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3689 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3690 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3691 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3692 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3693 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3694 } 3695 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3696 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3697 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3698 } else { 3699 if (lda_rhs != n_R) { 3700 IS dummy; 3701 3702 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3703 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3704 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3705 } else { 3706 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3707 pcbddc->local_auxmat2 = local_auxmat2_R; 3708 } 3709 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3710 } 3711 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3712 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3713 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3714 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3715 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3716 if (isCHOL) { 3717 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3718 } else { 3719 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3720 } 3721 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3722 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3723 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3724 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3725 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3726 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3727 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3728 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3729 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3730 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3731 } 3732 3733 /* Get submatrices from subdomain matrix */ 3734 if (n_vertices) { 3735 IS is_aux; 3736 PetscBool isseqaij; 3737 3738 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3739 IS tis; 3740 3741 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3742 ierr = ISSort(tis);CHKERRQ(ierr); 3743 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3744 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3745 } else { 3746 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3747 } 3748 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3749 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3750 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3751 if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */ 3752 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3753 } 3754 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3755 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3756 } 3757 3758 /* Matrix of coarse basis functions (local) */ 3759 if (pcbddc->coarse_phi_B) { 3760 PetscInt on_B,on_primal,on_D=n_D; 3761 if (pcbddc->coarse_phi_D) { 3762 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3763 } 3764 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3765 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3766 PetscScalar *marray; 3767 3768 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3769 ierr = PetscFree(marray);CHKERRQ(ierr); 3770 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3771 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3772 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3773 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3774 } 3775 } 3776 3777 if (!pcbddc->coarse_phi_B) { 3778 PetscScalar *marr; 3779 3780 /* memory size */ 3781 n = n_B*pcbddc->local_primal_size; 3782 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 3783 if (!pcbddc->symmetric_primal) n *= 2; 3784 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 3785 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3786 marr += n_B*pcbddc->local_primal_size; 3787 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3788 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3789 marr += n_D*pcbddc->local_primal_size; 3790 } 3791 if (!pcbddc->symmetric_primal) { 3792 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3793 marr += n_B*pcbddc->local_primal_size; 3794 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3795 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3796 } 3797 } else { 3798 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3799 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3800 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3801 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3802 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3803 } 3804 } 3805 } 3806 3807 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3808 p0_lidx_I = NULL; 3809 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3810 const PetscInt *idxs; 3811 3812 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3813 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3814 for (i=0;i<pcbddc->benign_n;i++) { 3815 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3816 } 3817 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3818 } 3819 3820 /* vertices */ 3821 if (n_vertices) { 3822 PetscBool restoreavr = PETSC_FALSE; 3823 3824 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3825 3826 if (n_R) { 3827 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3828 PetscBLASInt B_N,B_one = 1; 3829 PetscScalar *x,*y; 3830 3831 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3832 if (need_benign_correction) { 3833 ISLocalToGlobalMapping RtoN; 3834 IS is_p0; 3835 PetscInt *idxs_p0,n; 3836 3837 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3838 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3839 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3840 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3841 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3842 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3843 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3844 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3845 } 3846 3847 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3848 if (!sparserhs || need_benign_correction) { 3849 if (lda_rhs == n_R) { 3850 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3851 } else { 3852 PetscScalar *av,*array; 3853 const PetscInt *xadj,*adjncy; 3854 PetscInt n; 3855 PetscBool flg_row; 3856 3857 array = work+lda_rhs*n_vertices; 3858 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3859 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3860 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3861 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3862 for (i=0;i<n;i++) { 3863 PetscInt j; 3864 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3865 } 3866 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3867 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3868 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3869 } 3870 if (need_benign_correction) { 3871 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3872 PetscScalar *marr; 3873 3874 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3875 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3876 3877 | 0 0 0 | (V) 3878 L = | 0 0 -1 | (P-p0) 3879 | 0 0 -1 | (p0) 3880 3881 */ 3882 for (i=0;i<reuse_solver->benign_n;i++) { 3883 const PetscScalar *vals; 3884 const PetscInt *idxs,*idxs_zero; 3885 PetscInt n,j,nz; 3886 3887 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3888 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3889 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3890 for (j=0;j<n;j++) { 3891 PetscScalar val = vals[j]; 3892 PetscInt k,col = idxs[j]; 3893 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3894 } 3895 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3896 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3897 } 3898 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3899 } 3900 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 3901 Brhs = A_RV; 3902 } else { 3903 Mat tA_RVT,A_RVT; 3904 3905 if (!pcbddc->symmetric_primal) { 3906 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 3907 } else { 3908 restoreavr = PETSC_TRUE; 3909 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 3910 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 3911 A_RVT = A_VR; 3912 } 3913 if (lda_rhs != n_R) { 3914 PetscScalar *aa; 3915 PetscInt r,*ii,*jj; 3916 PetscBool done; 3917 3918 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3919 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed");CHKERRQ(ierr); 3920 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 3921 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 3922 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3923 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed");CHKERRQ(ierr); 3924 } else { 3925 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 3926 tA_RVT = A_RVT; 3927 } 3928 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 3929 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 3930 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 3931 } 3932 if (F) { 3933 /* need to correct the rhs */ 3934 if (need_benign_correction) { 3935 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3936 PetscScalar *marr; 3937 3938 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 3939 if (lda_rhs != n_R) { 3940 for (i=0;i<n_vertices;i++) { 3941 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3942 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3943 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3944 } 3945 } else { 3946 for (i=0;i<n_vertices;i++) { 3947 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3948 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3949 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3950 } 3951 } 3952 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 3953 } 3954 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 3955 if (restoreavr) { 3956 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 3957 } 3958 /* need to correct the solution */ 3959 if (need_benign_correction) { 3960 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3961 PetscScalar *marr; 3962 3963 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3964 if (lda_rhs != n_R) { 3965 for (i=0;i<n_vertices;i++) { 3966 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3967 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3968 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3969 } 3970 } else { 3971 for (i=0;i<n_vertices;i++) { 3972 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3973 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3974 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3975 } 3976 } 3977 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3978 } 3979 } else { 3980 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 3981 for (i=0;i<n_vertices;i++) { 3982 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3983 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3984 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3985 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3986 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3987 } 3988 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 3989 } 3990 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3991 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3992 /* S_VV and S_CV */ 3993 if (n_constraints) { 3994 Mat B; 3995 3996 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3997 for (i=0;i<n_vertices;i++) { 3998 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3999 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4000 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4001 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4002 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4003 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4004 } 4005 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4006 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 4007 ierr = MatDestroy(&B);CHKERRQ(ierr); 4008 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4009 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4010 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4011 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4012 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4013 ierr = MatDestroy(&B);CHKERRQ(ierr); 4014 } 4015 if (lda_rhs != n_R) { 4016 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4017 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4018 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4019 } 4020 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4021 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4022 if (need_benign_correction) { 4023 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4024 PetscScalar *marr,*sums; 4025 4026 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4027 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4028 for (i=0;i<reuse_solver->benign_n;i++) { 4029 const PetscScalar *vals; 4030 const PetscInt *idxs,*idxs_zero; 4031 PetscInt n,j,nz; 4032 4033 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4034 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4035 for (j=0;j<n_vertices;j++) { 4036 PetscInt k; 4037 sums[j] = 0.; 4038 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4039 } 4040 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4041 for (j=0;j<n;j++) { 4042 PetscScalar val = vals[j]; 4043 PetscInt k; 4044 for (k=0;k<n_vertices;k++) { 4045 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4046 } 4047 } 4048 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4049 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4050 } 4051 ierr = PetscFree(sums);CHKERRQ(ierr); 4052 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4053 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4054 } 4055 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4056 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4057 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 4058 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4059 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4060 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 4061 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4062 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4063 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4064 } else { 4065 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4066 } 4067 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4068 4069 /* coarse basis functions */ 4070 for (i=0;i<n_vertices;i++) { 4071 PetscScalar *y; 4072 4073 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4074 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4075 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4076 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4077 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4078 y[n_B*i+idx_V_B[i]] = 1.0; 4079 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4080 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4081 4082 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4083 PetscInt j; 4084 4085 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4086 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4087 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4088 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4089 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4090 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4091 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4092 } 4093 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4094 } 4095 /* if n_R == 0 the object is not destroyed */ 4096 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4097 } 4098 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4099 4100 if (n_constraints) { 4101 Mat B; 4102 4103 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4104 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4105 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4106 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4107 if (n_vertices) { 4108 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4109 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4110 } else { 4111 Mat S_VCt; 4112 4113 if (lda_rhs != n_R) { 4114 ierr = MatDestroy(&B);CHKERRQ(ierr); 4115 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4116 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4117 } 4118 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4119 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4120 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4121 } 4122 } 4123 ierr = MatDestroy(&B);CHKERRQ(ierr); 4124 /* coarse basis functions */ 4125 for (i=0;i<n_constraints;i++) { 4126 PetscScalar *y; 4127 4128 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4129 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4130 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4131 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4132 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4133 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4134 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4135 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4136 PetscInt j; 4137 4138 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4139 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4140 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4141 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4142 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4143 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4144 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4145 } 4146 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4147 } 4148 } 4149 if (n_constraints) { 4150 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4151 } 4152 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4153 4154 /* coarse matrix entries relative to B_0 */ 4155 if (pcbddc->benign_n) { 4156 Mat B0_B,B0_BPHI; 4157 IS is_dummy; 4158 PetscScalar *data; 4159 PetscInt j; 4160 4161 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4162 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4163 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4164 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4165 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4166 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 4167 for (j=0;j<pcbddc->benign_n;j++) { 4168 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4169 for (i=0;i<pcbddc->local_primal_size;i++) { 4170 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4171 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4172 } 4173 } 4174 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 4175 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4176 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4177 } 4178 4179 /* compute other basis functions for non-symmetric problems */ 4180 if (!pcbddc->symmetric_primal) { 4181 Mat B_V=NULL,B_C=NULL; 4182 PetscScalar *marray; 4183 4184 if (n_constraints) { 4185 Mat S_CCT,C_CRT; 4186 4187 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4188 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4189 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4190 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4191 if (n_vertices) { 4192 Mat S_VCT; 4193 4194 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4195 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4196 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4197 } 4198 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4199 } else { 4200 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4201 } 4202 if (n_vertices && n_R) { 4203 PetscScalar *av,*marray; 4204 const PetscInt *xadj,*adjncy; 4205 PetscInt n; 4206 PetscBool flg_row; 4207 4208 /* B_V = B_V - A_VR^T */ 4209 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4210 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4211 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4212 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4213 for (i=0;i<n;i++) { 4214 PetscInt j; 4215 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4216 } 4217 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4218 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4219 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4220 } 4221 4222 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4223 if (n_vertices) { 4224 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4225 for (i=0;i<n_vertices;i++) { 4226 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4227 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4228 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4229 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4230 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4231 } 4232 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4233 } 4234 if (B_C) { 4235 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4236 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4237 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4238 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4239 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4240 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4241 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4242 } 4243 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4244 } 4245 /* coarse basis functions */ 4246 for (i=0;i<pcbddc->local_primal_size;i++) { 4247 PetscScalar *y; 4248 4249 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4250 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4251 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4252 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4253 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4254 if (i<n_vertices) { 4255 y[n_B*i+idx_V_B[i]] = 1.0; 4256 } 4257 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4258 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4259 4260 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4261 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4262 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4263 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4264 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4265 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4266 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4267 } 4268 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4269 } 4270 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4271 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4272 } 4273 4274 /* free memory */ 4275 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4276 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4277 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4278 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4279 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4280 ierr = PetscFree(work);CHKERRQ(ierr); 4281 if (n_vertices) { 4282 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4283 } 4284 if (n_constraints) { 4285 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4286 } 4287 /* Checking coarse_sub_mat and coarse basis functios */ 4288 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4289 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4290 if (pcbddc->dbg_flag) { 4291 Mat coarse_sub_mat; 4292 Mat AUXMAT,TM1,TM2,TM3,TM4; 4293 Mat coarse_phi_D,coarse_phi_B; 4294 Mat coarse_psi_D,coarse_psi_B; 4295 Mat A_II,A_BB,A_IB,A_BI; 4296 Mat C_B,CPHI; 4297 IS is_dummy; 4298 Vec mones; 4299 MatType checkmattype=MATSEQAIJ; 4300 PetscReal real_value; 4301 4302 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4303 Mat A; 4304 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4305 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4306 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4307 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4308 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4309 ierr = MatDestroy(&A);CHKERRQ(ierr); 4310 } else { 4311 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4312 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4313 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4314 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4315 } 4316 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4317 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4318 if (!pcbddc->symmetric_primal) { 4319 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4320 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4321 } 4322 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4323 4324 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4325 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4326 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4327 if (!pcbddc->symmetric_primal) { 4328 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4329 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4330 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4331 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4332 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4333 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4334 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4335 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4336 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4337 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4338 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4339 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4340 } else { 4341 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4342 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4343 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4344 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4345 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4346 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4347 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4348 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4349 } 4350 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4351 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4352 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4353 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4354 if (pcbddc->benign_n) { 4355 Mat B0_B,B0_BPHI; 4356 PetscScalar *data,*data2; 4357 PetscInt j; 4358 4359 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4360 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4361 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4362 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4363 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4364 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4365 for (j=0;j<pcbddc->benign_n;j++) { 4366 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4367 for (i=0;i<pcbddc->local_primal_size;i++) { 4368 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4369 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4370 } 4371 } 4372 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4373 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4374 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4375 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4376 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4377 } 4378 #if 0 4379 { 4380 PetscViewer viewer; 4381 char filename[256]; 4382 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4383 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4384 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4385 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4386 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4387 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4388 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4389 if (save_change) { 4390 Mat phi_B; 4391 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 4392 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 4393 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 4394 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 4395 } else { 4396 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4397 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4398 } 4399 if (pcbddc->coarse_phi_D) { 4400 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4401 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4402 } 4403 if (pcbddc->coarse_psi_B) { 4404 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4405 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4406 } 4407 if (pcbddc->coarse_psi_D) { 4408 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4409 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4410 } 4411 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4412 } 4413 #endif 4414 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4415 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4416 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4417 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4418 4419 /* check constraints */ 4420 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4421 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4422 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4423 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4424 } else { 4425 PetscScalar *data; 4426 Mat tmat; 4427 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4428 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4429 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4430 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4431 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4432 } 4433 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4434 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4435 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4436 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4437 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4438 if (!pcbddc->symmetric_primal) { 4439 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4440 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4441 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4442 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4443 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4444 } 4445 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4446 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4447 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4448 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4449 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4450 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4451 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4452 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4453 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4454 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4455 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4456 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4457 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4458 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4459 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4460 if (!pcbddc->symmetric_primal) { 4461 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4462 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4463 } 4464 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4465 } 4466 /* get back data */ 4467 *coarse_submat_vals_n = coarse_submat_vals; 4468 PetscFunctionReturn(0); 4469 } 4470 4471 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4472 { 4473 Mat *work_mat; 4474 IS isrow_s,iscol_s; 4475 PetscBool rsorted,csorted; 4476 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4477 PetscErrorCode ierr; 4478 4479 PetscFunctionBegin; 4480 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4481 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4482 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4483 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4484 4485 if (!rsorted) { 4486 const PetscInt *idxs; 4487 PetscInt *idxs_sorted,i; 4488 4489 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4490 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4491 for (i=0;i<rsize;i++) { 4492 idxs_perm_r[i] = i; 4493 } 4494 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4495 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4496 for (i=0;i<rsize;i++) { 4497 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4498 } 4499 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4500 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4501 } else { 4502 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4503 isrow_s = isrow; 4504 } 4505 4506 if (!csorted) { 4507 if (isrow == iscol) { 4508 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4509 iscol_s = isrow_s; 4510 } else { 4511 const PetscInt *idxs; 4512 PetscInt *idxs_sorted,i; 4513 4514 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4515 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4516 for (i=0;i<csize;i++) { 4517 idxs_perm_c[i] = i; 4518 } 4519 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4520 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4521 for (i=0;i<csize;i++) { 4522 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4523 } 4524 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4525 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4526 } 4527 } else { 4528 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4529 iscol_s = iscol; 4530 } 4531 4532 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4533 4534 if (!rsorted || !csorted) { 4535 Mat new_mat; 4536 IS is_perm_r,is_perm_c; 4537 4538 if (!rsorted) { 4539 PetscInt *idxs_r,i; 4540 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4541 for (i=0;i<rsize;i++) { 4542 idxs_r[idxs_perm_r[i]] = i; 4543 } 4544 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4545 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4546 } else { 4547 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4548 } 4549 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4550 4551 if (!csorted) { 4552 if (isrow_s == iscol_s) { 4553 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4554 is_perm_c = is_perm_r; 4555 } else { 4556 PetscInt *idxs_c,i; 4557 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4558 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4559 for (i=0;i<csize;i++) { 4560 idxs_c[idxs_perm_c[i]] = i; 4561 } 4562 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4563 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4564 } 4565 } else { 4566 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4567 } 4568 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4569 4570 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4571 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4572 work_mat[0] = new_mat; 4573 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4574 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4575 } 4576 4577 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4578 *B = work_mat[0]; 4579 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4580 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4581 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4582 PetscFunctionReturn(0); 4583 } 4584 4585 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4586 { 4587 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4588 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4589 Mat new_mat,lA; 4590 IS is_local,is_global; 4591 PetscInt local_size; 4592 PetscBool isseqaij; 4593 PetscErrorCode ierr; 4594 4595 PetscFunctionBegin; 4596 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4597 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4598 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4599 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4600 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4601 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4602 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4603 4604 /* check */ 4605 if (pcbddc->dbg_flag) { 4606 Vec x,x_change; 4607 PetscReal error; 4608 4609 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4610 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4611 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4612 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4613 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4614 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4615 if (!pcbddc->change_interior) { 4616 const PetscScalar *x,*y,*v; 4617 PetscReal lerror = 0.; 4618 PetscInt i; 4619 4620 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4621 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4622 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4623 for (i=0;i<local_size;i++) 4624 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4625 lerror = PetscAbsScalar(x[i]-y[i]); 4626 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4627 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4628 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4629 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4630 if (error > PETSC_SMALL) { 4631 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4632 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4633 } else { 4634 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4635 } 4636 } 4637 } 4638 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4639 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4640 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4641 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4642 if (error > PETSC_SMALL) { 4643 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4644 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4645 } else { 4646 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4647 } 4648 } 4649 ierr = VecDestroy(&x);CHKERRQ(ierr); 4650 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4651 } 4652 4653 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4654 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4655 4656 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4657 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4658 if (isseqaij) { 4659 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4660 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4661 if (lA) { 4662 Mat work; 4663 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4664 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4665 ierr = MatDestroy(&work);CHKERRQ(ierr); 4666 } 4667 } else { 4668 Mat work_mat; 4669 4670 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4671 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4672 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4673 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4674 if (lA) { 4675 Mat work; 4676 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4677 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4678 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4679 ierr = MatDestroy(&work);CHKERRQ(ierr); 4680 } 4681 } 4682 if (matis->A->symmetric_set) { 4683 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4684 #if !defined(PETSC_USE_COMPLEX) 4685 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4686 #endif 4687 } 4688 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4689 PetscFunctionReturn(0); 4690 } 4691 4692 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4693 { 4694 PC_IS* pcis = (PC_IS*)(pc->data); 4695 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4696 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4697 PetscInt *idx_R_local=NULL; 4698 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4699 PetscInt vbs,bs; 4700 PetscBT bitmask=NULL; 4701 PetscErrorCode ierr; 4702 4703 PetscFunctionBegin; 4704 /* 4705 No need to setup local scatters if 4706 - primal space is unchanged 4707 AND 4708 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4709 AND 4710 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4711 */ 4712 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4713 PetscFunctionReturn(0); 4714 } 4715 /* destroy old objects */ 4716 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4717 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4718 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4719 /* Set Non-overlapping dimensions */ 4720 n_B = pcis->n_B; 4721 n_D = pcis->n - n_B; 4722 n_vertices = pcbddc->n_vertices; 4723 4724 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4725 4726 /* create auxiliary bitmask and allocate workspace */ 4727 if (!sub_schurs || !sub_schurs->reuse_solver) { 4728 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4729 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4730 for (i=0;i<n_vertices;i++) { 4731 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4732 } 4733 4734 for (i=0, n_R=0; i<pcis->n; i++) { 4735 if (!PetscBTLookup(bitmask,i)) { 4736 idx_R_local[n_R++] = i; 4737 } 4738 } 4739 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4740 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4741 4742 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4743 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4744 } 4745 4746 /* Block code */ 4747 vbs = 1; 4748 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4749 if (bs>1 && !(n_vertices%bs)) { 4750 PetscBool is_blocked = PETSC_TRUE; 4751 PetscInt *vary; 4752 if (!sub_schurs || !sub_schurs->reuse_solver) { 4753 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4754 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4755 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4756 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4757 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4758 for (i=0; i<pcis->n/bs; i++) { 4759 if (vary[i]!=0 && vary[i]!=bs) { 4760 is_blocked = PETSC_FALSE; 4761 break; 4762 } 4763 } 4764 ierr = PetscFree(vary);CHKERRQ(ierr); 4765 } else { 4766 /* Verify directly the R set */ 4767 for (i=0; i<n_R/bs; i++) { 4768 PetscInt j,node=idx_R_local[bs*i]; 4769 for (j=1; j<bs; j++) { 4770 if (node != idx_R_local[bs*i+j]-j) { 4771 is_blocked = PETSC_FALSE; 4772 break; 4773 } 4774 } 4775 } 4776 } 4777 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4778 vbs = bs; 4779 for (i=0;i<n_R/vbs;i++) { 4780 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4781 } 4782 } 4783 } 4784 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4785 if (sub_schurs && sub_schurs->reuse_solver) { 4786 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4787 4788 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4789 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4790 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4791 reuse_solver->is_R = pcbddc->is_R_local; 4792 } else { 4793 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4794 } 4795 4796 /* print some info if requested */ 4797 if (pcbddc->dbg_flag) { 4798 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4799 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4800 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4801 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4802 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4803 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4804 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4805 } 4806 4807 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4808 if (!sub_schurs || !sub_schurs->reuse_solver) { 4809 IS is_aux1,is_aux2; 4810 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4811 4812 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4813 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4814 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4815 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4816 for (i=0; i<n_D; i++) { 4817 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4818 } 4819 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4820 for (i=0, j=0; i<n_R; i++) { 4821 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4822 aux_array1[j++] = i; 4823 } 4824 } 4825 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4826 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4827 for (i=0, j=0; i<n_B; i++) { 4828 if (!PetscBTLookup(bitmask,is_indices[i])) { 4829 aux_array2[j++] = i; 4830 } 4831 } 4832 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4833 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4834 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4835 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4836 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4837 4838 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4839 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4840 for (i=0, j=0; i<n_R; i++) { 4841 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4842 aux_array1[j++] = i; 4843 } 4844 } 4845 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4846 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4847 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4848 } 4849 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4850 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4851 } else { 4852 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4853 IS tis; 4854 PetscInt schur_size; 4855 4856 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4857 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4858 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4859 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4860 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4861 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4862 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4863 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4864 } 4865 } 4866 PetscFunctionReturn(0); 4867 } 4868 4869 4870 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4871 { 4872 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4873 PC_IS *pcis = (PC_IS*)pc->data; 4874 PC pc_temp; 4875 Mat A_RR; 4876 MatReuse reuse; 4877 PetscScalar m_one = -1.0; 4878 PetscReal value; 4879 PetscInt n_D,n_R; 4880 PetscBool check_corr[2],issbaij; 4881 PetscErrorCode ierr; 4882 /* prefixes stuff */ 4883 char dir_prefix[256],neu_prefix[256],str_level[16]; 4884 size_t len; 4885 4886 PetscFunctionBegin; 4887 4888 /* compute prefixes */ 4889 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4890 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4891 if (!pcbddc->current_level) { 4892 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4893 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4894 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4895 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4896 } else { 4897 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4898 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4899 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4900 len -= 15; /* remove "pc_bddc_coarse_" */ 4901 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4902 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4903 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4904 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4905 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4906 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4907 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4908 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4909 } 4910 4911 /* DIRICHLET PROBLEM */ 4912 if (dirichlet) { 4913 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4914 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4915 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4916 if (pcbddc->dbg_flag) { 4917 Mat A_IIn; 4918 4919 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4920 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4921 pcis->A_II = A_IIn; 4922 } 4923 } 4924 if (pcbddc->local_mat->symmetric_set) { 4925 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4926 } 4927 /* Matrix for Dirichlet problem is pcis->A_II */ 4928 n_D = pcis->n - pcis->n_B; 4929 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4930 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4931 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4932 /* default */ 4933 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4934 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4935 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4936 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4937 if (issbaij) { 4938 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4939 } else { 4940 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4941 } 4942 /* Allow user's customization */ 4943 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4944 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4945 } 4946 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4947 if (sub_schurs && sub_schurs->reuse_solver) { 4948 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4949 4950 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4951 } 4952 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4953 if (!n_D) { 4954 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4955 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4956 } 4957 /* Set Up KSP for Dirichlet problem of BDDC */ 4958 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4959 /* set ksp_D into pcis data */ 4960 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4961 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4962 pcis->ksp_D = pcbddc->ksp_D; 4963 } 4964 4965 /* NEUMANN PROBLEM */ 4966 A_RR = 0; 4967 if (neumann) { 4968 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4969 PetscInt ibs,mbs; 4970 PetscBool issbaij; 4971 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4972 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4973 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4974 if (pcbddc->ksp_R) { /* already created ksp */ 4975 PetscInt nn_R; 4976 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4977 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4978 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4979 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4980 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4981 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4982 reuse = MAT_INITIAL_MATRIX; 4983 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4984 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4985 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4986 reuse = MAT_INITIAL_MATRIX; 4987 } else { /* safe to reuse the matrix */ 4988 reuse = MAT_REUSE_MATRIX; 4989 } 4990 } 4991 /* last check */ 4992 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4993 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4994 reuse = MAT_INITIAL_MATRIX; 4995 } 4996 } else { /* first time, so we need to create the matrix */ 4997 reuse = MAT_INITIAL_MATRIX; 4998 } 4999 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 5000 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5001 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5002 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5003 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5004 if (matis->A == pcbddc->local_mat) { 5005 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5006 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5007 } else { 5008 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5009 } 5010 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5011 if (matis->A == pcbddc->local_mat) { 5012 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5013 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5014 } else { 5015 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5016 } 5017 } 5018 /* extract A_RR */ 5019 if (sub_schurs && sub_schurs->reuse_solver) { 5020 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5021 5022 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5023 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5024 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5025 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5026 } else { 5027 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5028 } 5029 } else { 5030 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5031 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5032 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5033 } 5034 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5035 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5036 } 5037 if (pcbddc->local_mat->symmetric_set) { 5038 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5039 } 5040 if (!pcbddc->ksp_R) { /* create object if not present */ 5041 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5042 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5043 /* default */ 5044 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5045 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5046 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5047 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5048 if (issbaij) { 5049 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5050 } else { 5051 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5052 } 5053 /* Allow user's customization */ 5054 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5055 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 5056 } 5057 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5058 if (!n_R) { 5059 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5060 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5061 } 5062 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5063 /* Reuse solver if it is present */ 5064 if (sub_schurs && sub_schurs->reuse_solver && sub_schurs->A == pcbddc->local_mat) { 5065 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5066 5067 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5068 } 5069 /* Set Up KSP for Neumann problem of BDDC */ 5070 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 5071 } 5072 5073 if (pcbddc->dbg_flag) { 5074 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5075 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5076 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5077 } 5078 5079 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5080 check_corr[0] = check_corr[1] = PETSC_FALSE; 5081 if (pcbddc->NullSpace_corr[0]) { 5082 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5083 } 5084 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5085 check_corr[0] = PETSC_TRUE; 5086 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5087 } 5088 if (neumann && pcbddc->NullSpace_corr[2]) { 5089 check_corr[1] = PETSC_TRUE; 5090 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5091 } 5092 5093 /* check Dirichlet and Neumann solvers */ 5094 if (pcbddc->dbg_flag) { 5095 if (dirichlet) { /* Dirichlet */ 5096 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5097 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5098 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5099 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5100 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5101 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5102 if (check_corr[0]) { 5103 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 5104 } 5105 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5106 } 5107 if (neumann) { /* Neumann */ 5108 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5109 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5110 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5111 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5112 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5113 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5114 if (check_corr[1]) { 5115 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 5116 } 5117 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5118 } 5119 } 5120 /* free Neumann problem's matrix */ 5121 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5122 PetscFunctionReturn(0); 5123 } 5124 5125 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5126 { 5127 PetscErrorCode ierr; 5128 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5129 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5130 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 5131 5132 PetscFunctionBegin; 5133 if (!reuse_solver) { 5134 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5135 } 5136 if (!pcbddc->switch_static) { 5137 if (applytranspose && pcbddc->local_auxmat1) { 5138 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5139 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5140 } 5141 if (!reuse_solver) { 5142 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5143 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5144 } else { 5145 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5146 5147 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5148 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5149 } 5150 } else { 5151 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5152 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5153 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5154 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5155 if (applytranspose && pcbddc->local_auxmat1) { 5156 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5157 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5158 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5159 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5160 } 5161 } 5162 if (!reuse_solver || pcbddc->switch_static) { 5163 if (applytranspose) { 5164 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5165 } else { 5166 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5167 } 5168 } else { 5169 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5170 5171 if (applytranspose) { 5172 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5173 } else { 5174 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5175 } 5176 } 5177 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5178 if (!pcbddc->switch_static) { 5179 if (!reuse_solver) { 5180 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5181 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5182 } else { 5183 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5184 5185 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5186 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5187 } 5188 if (!applytranspose && pcbddc->local_auxmat1) { 5189 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5190 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5191 } 5192 } else { 5193 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5194 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5195 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5196 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5197 if (!applytranspose && pcbddc->local_auxmat1) { 5198 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5199 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5200 } 5201 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5202 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5203 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5204 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5205 } 5206 PetscFunctionReturn(0); 5207 } 5208 5209 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5210 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5211 { 5212 PetscErrorCode ierr; 5213 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5214 PC_IS* pcis = (PC_IS*) (pc->data); 5215 const PetscScalar zero = 0.0; 5216 5217 PetscFunctionBegin; 5218 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5219 if (!pcbddc->benign_apply_coarse_only) { 5220 if (applytranspose) { 5221 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5222 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5223 } else { 5224 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5225 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5226 } 5227 } else { 5228 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5229 } 5230 5231 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5232 if (pcbddc->benign_n) { 5233 PetscScalar *array; 5234 PetscInt j; 5235 5236 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5237 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5238 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5239 } 5240 5241 /* start communications from local primal nodes to rhs of coarse solver */ 5242 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5243 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5244 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5245 5246 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5247 if (pcbddc->coarse_ksp) { 5248 Mat coarse_mat; 5249 Vec rhs,sol; 5250 MatNullSpace nullsp; 5251 PetscBool isbddc = PETSC_FALSE; 5252 5253 if (pcbddc->benign_have_null) { 5254 PC coarse_pc; 5255 5256 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5257 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5258 /* we need to propagate to coarser levels the need for a possible benign correction */ 5259 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5260 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5261 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5262 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5263 } 5264 } 5265 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5266 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5267 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5268 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5269 if (nullsp) { 5270 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5271 } 5272 if (applytranspose) { 5273 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5274 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5275 } else { 5276 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5277 PC coarse_pc; 5278 5279 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5280 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5281 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5282 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5283 } else { 5284 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5285 } 5286 } 5287 /* we don't need the benign correction at coarser levels anymore */ 5288 if (pcbddc->benign_have_null && isbddc) { 5289 PC coarse_pc; 5290 PC_BDDC* coarsepcbddc; 5291 5292 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5293 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5294 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5295 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5296 } 5297 if (nullsp) { 5298 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5299 } 5300 } 5301 5302 /* Local solution on R nodes */ 5303 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5304 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5305 } 5306 /* communications from coarse sol to local primal nodes */ 5307 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5308 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5309 5310 /* Sum contributions from the two levels */ 5311 if (!pcbddc->benign_apply_coarse_only) { 5312 if (applytranspose) { 5313 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5314 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5315 } else { 5316 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5317 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5318 } 5319 /* store p0 */ 5320 if (pcbddc->benign_n) { 5321 PetscScalar *array; 5322 PetscInt j; 5323 5324 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5325 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5326 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5327 } 5328 } else { /* expand the coarse solution */ 5329 if (applytranspose) { 5330 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5331 } else { 5332 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5333 } 5334 } 5335 PetscFunctionReturn(0); 5336 } 5337 5338 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5339 { 5340 PetscErrorCode ierr; 5341 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5342 PetscScalar *array; 5343 Vec from,to; 5344 5345 PetscFunctionBegin; 5346 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5347 from = pcbddc->coarse_vec; 5348 to = pcbddc->vec1_P; 5349 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5350 Vec tvec; 5351 5352 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5353 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5354 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5355 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5356 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5357 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5358 } 5359 } else { /* from local to global -> put data in coarse right hand side */ 5360 from = pcbddc->vec1_P; 5361 to = pcbddc->coarse_vec; 5362 } 5363 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5364 PetscFunctionReturn(0); 5365 } 5366 5367 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5368 { 5369 PetscErrorCode ierr; 5370 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5371 PetscScalar *array; 5372 Vec from,to; 5373 5374 PetscFunctionBegin; 5375 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5376 from = pcbddc->coarse_vec; 5377 to = pcbddc->vec1_P; 5378 } else { /* from local to global -> put data in coarse right hand side */ 5379 from = pcbddc->vec1_P; 5380 to = pcbddc->coarse_vec; 5381 } 5382 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5383 if (smode == SCATTER_FORWARD) { 5384 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5385 Vec tvec; 5386 5387 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5388 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5389 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5390 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5391 } 5392 } else { 5393 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5394 ierr = VecResetArray(from);CHKERRQ(ierr); 5395 } 5396 } 5397 PetscFunctionReturn(0); 5398 } 5399 5400 /* uncomment for testing purposes */ 5401 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5402 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5403 { 5404 PetscErrorCode ierr; 5405 PC_IS* pcis = (PC_IS*)(pc->data); 5406 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5407 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5408 /* one and zero */ 5409 PetscScalar one=1.0,zero=0.0; 5410 /* space to store constraints and their local indices */ 5411 PetscScalar *constraints_data; 5412 PetscInt *constraints_idxs,*constraints_idxs_B; 5413 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5414 PetscInt *constraints_n; 5415 /* iterators */ 5416 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5417 /* BLAS integers */ 5418 PetscBLASInt lwork,lierr; 5419 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5420 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5421 /* reuse */ 5422 PetscInt olocal_primal_size,olocal_primal_size_cc; 5423 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5424 /* change of basis */ 5425 PetscBool qr_needed; 5426 PetscBT change_basis,qr_needed_idx; 5427 /* auxiliary stuff */ 5428 PetscInt *nnz,*is_indices; 5429 PetscInt ncc; 5430 /* some quantities */ 5431 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5432 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5433 5434 PetscFunctionBegin; 5435 /* Destroy Mat objects computed previously */ 5436 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5437 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5438 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5439 /* save info on constraints from previous setup (if any) */ 5440 olocal_primal_size = pcbddc->local_primal_size; 5441 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5442 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5443 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5444 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5445 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5446 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5447 5448 if (!pcbddc->adaptive_selection) { 5449 IS ISForVertices,*ISForFaces,*ISForEdges; 5450 MatNullSpace nearnullsp; 5451 const Vec *nearnullvecs; 5452 Vec *localnearnullsp; 5453 PetscScalar *array; 5454 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5455 PetscBool nnsp_has_cnst; 5456 /* LAPACK working arrays for SVD or POD */ 5457 PetscBool skip_lapack,boolforchange; 5458 PetscScalar *work; 5459 PetscReal *singular_vals; 5460 #if defined(PETSC_USE_COMPLEX) 5461 PetscReal *rwork; 5462 #endif 5463 #if defined(PETSC_MISSING_LAPACK_GESVD) 5464 PetscScalar *temp_basis,*correlation_mat; 5465 #else 5466 PetscBLASInt dummy_int=1; 5467 PetscScalar dummy_scalar=1.; 5468 #endif 5469 5470 /* Get index sets for faces, edges and vertices from graph */ 5471 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5472 /* print some info */ 5473 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5474 PetscInt nv; 5475 5476 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5477 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5478 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5479 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5480 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5481 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5482 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5483 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5484 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5485 } 5486 5487 /* free unneeded index sets */ 5488 if (!pcbddc->use_vertices) { 5489 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5490 } 5491 if (!pcbddc->use_edges) { 5492 for (i=0;i<n_ISForEdges;i++) { 5493 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5494 } 5495 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5496 n_ISForEdges = 0; 5497 } 5498 if (!pcbddc->use_faces) { 5499 for (i=0;i<n_ISForFaces;i++) { 5500 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5501 } 5502 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5503 n_ISForFaces = 0; 5504 } 5505 5506 /* check if near null space is attached to global mat */ 5507 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5508 if (nearnullsp) { 5509 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5510 /* remove any stored info */ 5511 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5512 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5513 /* store information for BDDC solver reuse */ 5514 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5515 pcbddc->onearnullspace = nearnullsp; 5516 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5517 for (i=0;i<nnsp_size;i++) { 5518 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5519 } 5520 } else { /* if near null space is not provided BDDC uses constants by default */ 5521 nnsp_size = 0; 5522 nnsp_has_cnst = PETSC_TRUE; 5523 } 5524 /* get max number of constraints on a single cc */ 5525 max_constraints = nnsp_size; 5526 if (nnsp_has_cnst) max_constraints++; 5527 5528 /* 5529 Evaluate maximum storage size needed by the procedure 5530 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5531 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5532 There can be multiple constraints per connected component 5533 */ 5534 n_vertices = 0; 5535 if (ISForVertices) { 5536 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5537 } 5538 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5539 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5540 5541 total_counts = n_ISForFaces+n_ISForEdges; 5542 total_counts *= max_constraints; 5543 total_counts += n_vertices; 5544 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5545 5546 total_counts = 0; 5547 max_size_of_constraint = 0; 5548 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5549 IS used_is; 5550 if (i<n_ISForEdges) { 5551 used_is = ISForEdges[i]; 5552 } else { 5553 used_is = ISForFaces[i-n_ISForEdges]; 5554 } 5555 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5556 total_counts += j; 5557 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5558 } 5559 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5560 5561 /* get local part of global near null space vectors */ 5562 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5563 for (k=0;k<nnsp_size;k++) { 5564 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5565 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5566 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5567 } 5568 5569 /* whether or not to skip lapack calls */ 5570 skip_lapack = PETSC_TRUE; 5571 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5572 5573 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5574 if (!skip_lapack) { 5575 PetscScalar temp_work; 5576 5577 #if defined(PETSC_MISSING_LAPACK_GESVD) 5578 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5579 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5580 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5581 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5582 #if defined(PETSC_USE_COMPLEX) 5583 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5584 #endif 5585 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5586 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5587 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5588 lwork = -1; 5589 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5590 #if !defined(PETSC_USE_COMPLEX) 5591 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5592 #else 5593 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5594 #endif 5595 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5596 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5597 #else /* on missing GESVD */ 5598 /* SVD */ 5599 PetscInt max_n,min_n; 5600 max_n = max_size_of_constraint; 5601 min_n = max_constraints; 5602 if (max_size_of_constraint < max_constraints) { 5603 min_n = max_size_of_constraint; 5604 max_n = max_constraints; 5605 } 5606 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5607 #if defined(PETSC_USE_COMPLEX) 5608 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5609 #endif 5610 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5611 lwork = -1; 5612 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5613 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5614 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5615 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5616 #if !defined(PETSC_USE_COMPLEX) 5617 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5618 #else 5619 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5620 #endif 5621 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5622 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5623 #endif /* on missing GESVD */ 5624 /* Allocate optimal workspace */ 5625 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5626 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5627 } 5628 /* Now we can loop on constraining sets */ 5629 total_counts = 0; 5630 constraints_idxs_ptr[0] = 0; 5631 constraints_data_ptr[0] = 0; 5632 /* vertices */ 5633 if (n_vertices) { 5634 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5635 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5636 for (i=0;i<n_vertices;i++) { 5637 constraints_n[total_counts] = 1; 5638 constraints_data[total_counts] = 1.0; 5639 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5640 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5641 total_counts++; 5642 } 5643 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5644 n_vertices = total_counts; 5645 } 5646 5647 /* edges and faces */ 5648 total_counts_cc = total_counts; 5649 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5650 IS used_is; 5651 PetscBool idxs_copied = PETSC_FALSE; 5652 5653 if (ncc<n_ISForEdges) { 5654 used_is = ISForEdges[ncc]; 5655 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5656 } else { 5657 used_is = ISForFaces[ncc-n_ISForEdges]; 5658 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5659 } 5660 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5661 5662 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5663 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5664 /* change of basis should not be performed on local periodic nodes */ 5665 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5666 if (nnsp_has_cnst) { 5667 PetscScalar quad_value; 5668 5669 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5670 idxs_copied = PETSC_TRUE; 5671 5672 if (!pcbddc->use_nnsp_true) { 5673 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5674 } else { 5675 quad_value = 1.0; 5676 } 5677 for (j=0;j<size_of_constraint;j++) { 5678 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5679 } 5680 temp_constraints++; 5681 total_counts++; 5682 } 5683 for (k=0;k<nnsp_size;k++) { 5684 PetscReal real_value; 5685 PetscScalar *ptr_to_data; 5686 5687 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5688 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5689 for (j=0;j<size_of_constraint;j++) { 5690 ptr_to_data[j] = array[is_indices[j]]; 5691 } 5692 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5693 /* check if array is null on the connected component */ 5694 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5695 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5696 if (real_value > 0.0) { /* keep indices and values */ 5697 temp_constraints++; 5698 total_counts++; 5699 if (!idxs_copied) { 5700 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5701 idxs_copied = PETSC_TRUE; 5702 } 5703 } 5704 } 5705 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5706 valid_constraints = temp_constraints; 5707 if (!pcbddc->use_nnsp_true && temp_constraints) { 5708 if (temp_constraints == 1) { /* just normalize the constraint */ 5709 PetscScalar norm,*ptr_to_data; 5710 5711 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5712 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5713 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5714 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5715 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5716 } else { /* perform SVD */ 5717 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5718 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5719 5720 #if defined(PETSC_MISSING_LAPACK_GESVD) 5721 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5722 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5723 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5724 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5725 from that computed using LAPACKgesvd 5726 -> This is due to a different computation of eigenvectors in LAPACKheev 5727 -> The quality of the POD-computed basis will be the same */ 5728 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5729 /* Store upper triangular part of correlation matrix */ 5730 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5731 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5732 for (j=0;j<temp_constraints;j++) { 5733 for (k=0;k<j+1;k++) { 5734 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5735 } 5736 } 5737 /* compute eigenvalues and eigenvectors of correlation matrix */ 5738 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5739 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5740 #if !defined(PETSC_USE_COMPLEX) 5741 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5742 #else 5743 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5744 #endif 5745 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5746 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5747 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5748 j = 0; 5749 while (j < temp_constraints && singular_vals[j] < tol) j++; 5750 total_counts = total_counts-j; 5751 valid_constraints = temp_constraints-j; 5752 /* scale and copy POD basis into used quadrature memory */ 5753 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5754 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5755 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5756 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5757 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5758 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5759 if (j<temp_constraints) { 5760 PetscInt ii; 5761 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5762 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5763 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5764 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5765 for (k=0;k<temp_constraints-j;k++) { 5766 for (ii=0;ii<size_of_constraint;ii++) { 5767 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5768 } 5769 } 5770 } 5771 #else /* on missing GESVD */ 5772 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5773 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5774 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5775 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5776 #if !defined(PETSC_USE_COMPLEX) 5777 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5778 #else 5779 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5780 #endif 5781 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5782 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5783 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5784 k = temp_constraints; 5785 if (k > size_of_constraint) k = size_of_constraint; 5786 j = 0; 5787 while (j < k && singular_vals[k-j-1] < tol) j++; 5788 valid_constraints = k-j; 5789 total_counts = total_counts-temp_constraints+valid_constraints; 5790 #endif /* on missing GESVD */ 5791 } 5792 } 5793 /* update pointers information */ 5794 if (valid_constraints) { 5795 constraints_n[total_counts_cc] = valid_constraints; 5796 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5797 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5798 /* set change_of_basis flag */ 5799 if (boolforchange) { 5800 PetscBTSet(change_basis,total_counts_cc); 5801 } 5802 total_counts_cc++; 5803 } 5804 } 5805 /* free workspace */ 5806 if (!skip_lapack) { 5807 ierr = PetscFree(work);CHKERRQ(ierr); 5808 #if defined(PETSC_USE_COMPLEX) 5809 ierr = PetscFree(rwork);CHKERRQ(ierr); 5810 #endif 5811 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5812 #if defined(PETSC_MISSING_LAPACK_GESVD) 5813 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5814 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5815 #endif 5816 } 5817 for (k=0;k<nnsp_size;k++) { 5818 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5819 } 5820 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5821 /* free index sets of faces, edges and vertices */ 5822 for (i=0;i<n_ISForFaces;i++) { 5823 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5824 } 5825 if (n_ISForFaces) { 5826 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5827 } 5828 for (i=0;i<n_ISForEdges;i++) { 5829 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5830 } 5831 if (n_ISForEdges) { 5832 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5833 } 5834 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5835 } else { 5836 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5837 5838 total_counts = 0; 5839 n_vertices = 0; 5840 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5841 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5842 } 5843 max_constraints = 0; 5844 total_counts_cc = 0; 5845 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5846 total_counts += pcbddc->adaptive_constraints_n[i]; 5847 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5848 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5849 } 5850 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5851 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5852 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5853 constraints_data = pcbddc->adaptive_constraints_data; 5854 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5855 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5856 total_counts_cc = 0; 5857 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5858 if (pcbddc->adaptive_constraints_n[i]) { 5859 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5860 } 5861 } 5862 #if 0 5863 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5864 for (i=0;i<total_counts_cc;i++) { 5865 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5866 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5867 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5868 printf(" %d",constraints_idxs[j]); 5869 } 5870 printf("\n"); 5871 printf("number of cc: %d\n",constraints_n[i]); 5872 } 5873 for (i=0;i<n_vertices;i++) { 5874 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5875 } 5876 for (i=0;i<sub_schurs->n_subs;i++) { 5877 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5878 } 5879 #endif 5880 5881 max_size_of_constraint = 0; 5882 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5883 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5884 /* Change of basis */ 5885 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5886 if (pcbddc->use_change_of_basis) { 5887 for (i=0;i<sub_schurs->n_subs;i++) { 5888 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5889 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5890 } 5891 } 5892 } 5893 } 5894 pcbddc->local_primal_size = total_counts; 5895 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5896 5897 /* map constraints_idxs in boundary numbering */ 5898 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5899 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5900 5901 /* Create constraint matrix */ 5902 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5903 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5904 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5905 5906 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5907 /* determine if a QR strategy is needed for change of basis */ 5908 qr_needed = PETSC_FALSE; 5909 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5910 total_primal_vertices=0; 5911 pcbddc->local_primal_size_cc = 0; 5912 for (i=0;i<total_counts_cc;i++) { 5913 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5914 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5915 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5916 pcbddc->local_primal_size_cc += 1; 5917 } else if (PetscBTLookup(change_basis,i)) { 5918 for (k=0;k<constraints_n[i];k++) { 5919 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5920 } 5921 pcbddc->local_primal_size_cc += constraints_n[i]; 5922 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5923 PetscBTSet(qr_needed_idx,i); 5924 qr_needed = PETSC_TRUE; 5925 } 5926 } else { 5927 pcbddc->local_primal_size_cc += 1; 5928 } 5929 } 5930 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5931 pcbddc->n_vertices = total_primal_vertices; 5932 /* permute indices in order to have a sorted set of vertices */ 5933 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5934 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5935 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5936 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5937 5938 /* nonzero structure of constraint matrix */ 5939 /* and get reference dof for local constraints */ 5940 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5941 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5942 5943 j = total_primal_vertices; 5944 total_counts = total_primal_vertices; 5945 cum = total_primal_vertices; 5946 for (i=n_vertices;i<total_counts_cc;i++) { 5947 if (!PetscBTLookup(change_basis,i)) { 5948 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5949 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5950 cum++; 5951 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5952 for (k=0;k<constraints_n[i];k++) { 5953 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5954 nnz[j+k] = size_of_constraint; 5955 } 5956 j += constraints_n[i]; 5957 } 5958 } 5959 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5960 ierr = PetscFree(nnz);CHKERRQ(ierr); 5961 5962 /* set values in constraint matrix */ 5963 for (i=0;i<total_primal_vertices;i++) { 5964 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5965 } 5966 total_counts = total_primal_vertices; 5967 for (i=n_vertices;i<total_counts_cc;i++) { 5968 if (!PetscBTLookup(change_basis,i)) { 5969 PetscInt *cols; 5970 5971 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5972 cols = constraints_idxs+constraints_idxs_ptr[i]; 5973 for (k=0;k<constraints_n[i];k++) { 5974 PetscInt row = total_counts+k; 5975 PetscScalar *vals; 5976 5977 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5978 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5979 } 5980 total_counts += constraints_n[i]; 5981 } 5982 } 5983 /* assembling */ 5984 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5985 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5986 5987 /* 5988 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5989 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5990 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5991 */ 5992 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5993 if (pcbddc->use_change_of_basis) { 5994 /* dual and primal dofs on a single cc */ 5995 PetscInt dual_dofs,primal_dofs; 5996 /* working stuff for GEQRF */ 5997 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5998 PetscBLASInt lqr_work; 5999 /* working stuff for UNGQR */ 6000 PetscScalar *gqr_work,lgqr_work_t; 6001 PetscBLASInt lgqr_work; 6002 /* working stuff for TRTRS */ 6003 PetscScalar *trs_rhs; 6004 PetscBLASInt Blas_NRHS; 6005 /* pointers for values insertion into change of basis matrix */ 6006 PetscInt *start_rows,*start_cols; 6007 PetscScalar *start_vals; 6008 /* working stuff for values insertion */ 6009 PetscBT is_primal; 6010 PetscInt *aux_primal_numbering_B; 6011 /* matrix sizes */ 6012 PetscInt global_size,local_size; 6013 /* temporary change of basis */ 6014 Mat localChangeOfBasisMatrix; 6015 /* extra space for debugging */ 6016 PetscScalar *dbg_work; 6017 6018 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6019 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6020 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6021 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6022 /* nonzeros for local mat */ 6023 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6024 if (!pcbddc->benign_change || pcbddc->fake_change) { 6025 for (i=0;i<pcis->n;i++) nnz[i]=1; 6026 } else { 6027 const PetscInt *ii; 6028 PetscInt n; 6029 PetscBool flg_row; 6030 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6031 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6032 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6033 } 6034 for (i=n_vertices;i<total_counts_cc;i++) { 6035 if (PetscBTLookup(change_basis,i)) { 6036 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6037 if (PetscBTLookup(qr_needed_idx,i)) { 6038 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6039 } else { 6040 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6041 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6042 } 6043 } 6044 } 6045 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6046 ierr = PetscFree(nnz);CHKERRQ(ierr); 6047 /* Set interior change in the matrix */ 6048 if (!pcbddc->benign_change || pcbddc->fake_change) { 6049 for (i=0;i<pcis->n;i++) { 6050 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6051 } 6052 } else { 6053 const PetscInt *ii,*jj; 6054 PetscScalar *aa; 6055 PetscInt n; 6056 PetscBool flg_row; 6057 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6058 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6059 for (i=0;i<n;i++) { 6060 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6061 } 6062 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6063 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6064 } 6065 6066 if (pcbddc->dbg_flag) { 6067 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6068 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6069 } 6070 6071 6072 /* Now we loop on the constraints which need a change of basis */ 6073 /* 6074 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6075 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6076 6077 Basic blocks of change of basis matrix T computed by 6078 6079 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6080 6081 | 1 0 ... 0 s_1/S | 6082 | 0 1 ... 0 s_2/S | 6083 | ... | 6084 | 0 ... 1 s_{n-1}/S | 6085 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6086 6087 with S = \sum_{i=1}^n s_i^2 6088 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6089 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6090 6091 - QR decomposition of constraints otherwise 6092 */ 6093 if (qr_needed) { 6094 /* space to store Q */ 6095 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6096 /* array to store scaling factors for reflectors */ 6097 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6098 /* first we issue queries for optimal work */ 6099 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6100 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6101 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6102 lqr_work = -1; 6103 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6104 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6105 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6106 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6107 lgqr_work = -1; 6108 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6109 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6110 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6111 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6112 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6113 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6114 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 6115 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6116 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6117 /* array to store rhs and solution of triangular solver */ 6118 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6119 /* allocating workspace for check */ 6120 if (pcbddc->dbg_flag) { 6121 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6122 } 6123 } 6124 /* array to store whether a node is primal or not */ 6125 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6126 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6127 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6128 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 6129 for (i=0;i<total_primal_vertices;i++) { 6130 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6131 } 6132 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6133 6134 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6135 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6136 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6137 if (PetscBTLookup(change_basis,total_counts)) { 6138 /* get constraint info */ 6139 primal_dofs = constraints_n[total_counts]; 6140 dual_dofs = size_of_constraint-primal_dofs; 6141 6142 if (pcbddc->dbg_flag) { 6143 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6144 } 6145 6146 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6147 6148 /* copy quadrature constraints for change of basis check */ 6149 if (pcbddc->dbg_flag) { 6150 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6151 } 6152 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6153 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6154 6155 /* compute QR decomposition of constraints */ 6156 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6157 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6158 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6159 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6160 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6161 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6162 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6163 6164 /* explictly compute R^-T */ 6165 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 6166 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6167 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6168 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6169 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6170 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6171 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6172 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6173 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6174 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6175 6176 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6177 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6178 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6179 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6180 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6181 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6182 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6183 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 6184 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6185 6186 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6187 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6188 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6189 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6190 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6191 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6192 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6193 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6194 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6195 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6196 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6197 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6198 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6199 6200 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6201 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6202 /* insert cols for primal dofs */ 6203 for (j=0;j<primal_dofs;j++) { 6204 start_vals = &qr_basis[j*size_of_constraint]; 6205 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6206 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6207 } 6208 /* insert cols for dual dofs */ 6209 for (j=0,k=0;j<dual_dofs;k++) { 6210 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6211 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6212 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6213 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6214 j++; 6215 } 6216 } 6217 6218 /* check change of basis */ 6219 if (pcbddc->dbg_flag) { 6220 PetscInt ii,jj; 6221 PetscBool valid_qr=PETSC_TRUE; 6222 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6223 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6224 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6225 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6226 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6227 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6228 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6229 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6230 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6231 for (jj=0;jj<size_of_constraint;jj++) { 6232 for (ii=0;ii<primal_dofs;ii++) { 6233 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6234 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 6235 } 6236 } 6237 if (!valid_qr) { 6238 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6239 for (jj=0;jj<size_of_constraint;jj++) { 6240 for (ii=0;ii<primal_dofs;ii++) { 6241 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6242 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6243 } 6244 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 6245 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6246 } 6247 } 6248 } 6249 } else { 6250 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6251 } 6252 } 6253 } else { /* simple transformation block */ 6254 PetscInt row,col; 6255 PetscScalar val,norm; 6256 6257 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6258 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6259 for (j=0;j<size_of_constraint;j++) { 6260 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6261 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6262 if (!PetscBTLookup(is_primal,row_B)) { 6263 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6264 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6265 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6266 } else { 6267 for (k=0;k<size_of_constraint;k++) { 6268 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6269 if (row != col) { 6270 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6271 } else { 6272 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6273 } 6274 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6275 } 6276 } 6277 } 6278 if (pcbddc->dbg_flag) { 6279 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6280 } 6281 } 6282 } else { 6283 if (pcbddc->dbg_flag) { 6284 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6285 } 6286 } 6287 } 6288 6289 /* free workspace */ 6290 if (qr_needed) { 6291 if (pcbddc->dbg_flag) { 6292 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6293 } 6294 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6295 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6296 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6297 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6298 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6299 } 6300 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6301 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6302 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6303 6304 /* assembling of global change of variable */ 6305 if (!pcbddc->fake_change) { 6306 Mat tmat; 6307 PetscInt bs; 6308 6309 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6310 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6311 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6312 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6313 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6314 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6315 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6316 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6317 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6318 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6319 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6320 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6321 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6322 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6323 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6324 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6325 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6326 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6327 6328 /* check */ 6329 if (pcbddc->dbg_flag) { 6330 PetscReal error; 6331 Vec x,x_change; 6332 6333 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6334 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6335 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6336 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6337 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6338 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6339 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6340 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6341 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6342 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6343 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6344 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6345 if (error > PETSC_SMALL) { 6346 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6347 } 6348 ierr = VecDestroy(&x);CHKERRQ(ierr); 6349 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6350 } 6351 /* adapt sub_schurs computed (if any) */ 6352 if (pcbddc->use_deluxe_scaling) { 6353 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6354 6355 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 6356 if (sub_schurs && sub_schurs->S_Ej_all) { 6357 Mat S_new,tmat; 6358 IS is_all_N,is_V_Sall = NULL; 6359 6360 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6361 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6362 if (pcbddc->deluxe_zerorows) { 6363 ISLocalToGlobalMapping NtoSall; 6364 IS is_V; 6365 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6366 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6367 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6368 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6369 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6370 } 6371 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6372 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6373 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6374 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6375 if (pcbddc->deluxe_zerorows) { 6376 const PetscScalar *array; 6377 const PetscInt *idxs_V,*idxs_all; 6378 PetscInt i,n_V; 6379 6380 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6381 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6382 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6383 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6384 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6385 for (i=0;i<n_V;i++) { 6386 PetscScalar val; 6387 PetscInt idx; 6388 6389 idx = idxs_V[i]; 6390 val = array[idxs_all[idxs_V[i]]]; 6391 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6392 } 6393 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6394 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6395 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6396 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6397 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6398 } 6399 sub_schurs->S_Ej_all = S_new; 6400 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6401 if (sub_schurs->sum_S_Ej_all) { 6402 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6403 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6404 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6405 if (pcbddc->deluxe_zerorows) { 6406 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6407 } 6408 sub_schurs->sum_S_Ej_all = S_new; 6409 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6410 } 6411 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6412 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6413 } 6414 /* destroy any change of basis context in sub_schurs */ 6415 if (sub_schurs && sub_schurs->change) { 6416 PetscInt i; 6417 6418 for (i=0;i<sub_schurs->n_subs;i++) { 6419 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6420 } 6421 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6422 } 6423 } 6424 if (pcbddc->switch_static) { /* need to save the local change */ 6425 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6426 } else { 6427 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6428 } 6429 /* determine if any process has changed the pressures locally */ 6430 pcbddc->change_interior = pcbddc->benign_have_null; 6431 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6432 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6433 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6434 pcbddc->use_qr_single = qr_needed; 6435 } 6436 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6437 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6438 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6439 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6440 } else { 6441 Mat benign_global = NULL; 6442 if (pcbddc->benign_have_null) { 6443 Mat tmat; 6444 6445 pcbddc->change_interior = PETSC_TRUE; 6446 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6447 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6448 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6449 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6450 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6451 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6452 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6453 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6454 if (pcbddc->benign_change) { 6455 Mat M; 6456 6457 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6458 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6459 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6460 ierr = MatDestroy(&M);CHKERRQ(ierr); 6461 } else { 6462 Mat eye; 6463 PetscScalar *array; 6464 6465 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6466 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6467 for (i=0;i<pcis->n;i++) { 6468 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6469 } 6470 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6471 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6472 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6473 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6474 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6475 } 6476 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6477 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6478 } 6479 if (pcbddc->user_ChangeOfBasisMatrix) { 6480 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6481 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6482 } else if (pcbddc->benign_have_null) { 6483 pcbddc->ChangeOfBasisMatrix = benign_global; 6484 } 6485 } 6486 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6487 IS is_global; 6488 const PetscInt *gidxs; 6489 6490 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6491 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6492 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6493 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6494 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6495 } 6496 } 6497 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6498 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6499 } 6500 6501 if (!pcbddc->fake_change) { 6502 /* add pressure dofs to set of primal nodes for numbering purposes */ 6503 for (i=0;i<pcbddc->benign_n;i++) { 6504 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6505 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6506 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6507 pcbddc->local_primal_size_cc++; 6508 pcbddc->local_primal_size++; 6509 } 6510 6511 /* check if a new primal space has been introduced (also take into account benign trick) */ 6512 pcbddc->new_primal_space_local = PETSC_TRUE; 6513 if (olocal_primal_size == pcbddc->local_primal_size) { 6514 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6515 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6516 if (!pcbddc->new_primal_space_local) { 6517 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6518 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6519 } 6520 } 6521 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6522 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6523 } 6524 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6525 6526 /* flush dbg viewer */ 6527 if (pcbddc->dbg_flag) { 6528 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6529 } 6530 6531 /* free workspace */ 6532 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6533 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6534 if (!pcbddc->adaptive_selection) { 6535 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6536 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6537 } else { 6538 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6539 pcbddc->adaptive_constraints_idxs_ptr, 6540 pcbddc->adaptive_constraints_data_ptr, 6541 pcbddc->adaptive_constraints_idxs, 6542 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6543 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6544 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6545 } 6546 PetscFunctionReturn(0); 6547 } 6548 6549 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6550 { 6551 ISLocalToGlobalMapping map; 6552 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6553 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6554 PetscInt i,N; 6555 PetscBool rcsr = PETSC_FALSE; 6556 PetscErrorCode ierr; 6557 6558 PetscFunctionBegin; 6559 if (pcbddc->recompute_topography) { 6560 pcbddc->graphanalyzed = PETSC_FALSE; 6561 /* Reset previously computed graph */ 6562 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6563 /* Init local Graph struct */ 6564 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6565 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6566 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6567 6568 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6569 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6570 } 6571 /* Check validity of the csr graph passed in by the user */ 6572 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6573 6574 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6575 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6576 PetscInt *xadj,*adjncy; 6577 PetscInt nvtxs; 6578 PetscBool flg_row=PETSC_FALSE; 6579 6580 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6581 if (flg_row) { 6582 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6583 pcbddc->computed_rowadj = PETSC_TRUE; 6584 } 6585 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6586 rcsr = PETSC_TRUE; 6587 } 6588 if (pcbddc->dbg_flag) { 6589 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6590 } 6591 6592 /* Setup of Graph */ 6593 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6594 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6595 6596 /* attach info on disconnected subdomains if present */ 6597 if (pcbddc->n_local_subs) { 6598 PetscInt *local_subs; 6599 6600 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6601 for (i=0;i<pcbddc->n_local_subs;i++) { 6602 const PetscInt *idxs; 6603 PetscInt nl,j; 6604 6605 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6606 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6607 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6608 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6609 } 6610 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6611 pcbddc->mat_graph->local_subs = local_subs; 6612 } 6613 } 6614 6615 if (!pcbddc->graphanalyzed) { 6616 /* Graph's connected components analysis */ 6617 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6618 pcbddc->graphanalyzed = PETSC_TRUE; 6619 } 6620 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6621 PetscFunctionReturn(0); 6622 } 6623 6624 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6625 { 6626 PetscInt i,j; 6627 PetscScalar *alphas; 6628 PetscErrorCode ierr; 6629 6630 PetscFunctionBegin; 6631 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6632 for (i=0;i<n;i++) { 6633 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6634 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6635 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6636 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6637 } 6638 ierr = PetscFree(alphas);CHKERRQ(ierr); 6639 PetscFunctionReturn(0); 6640 } 6641 6642 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6643 { 6644 Mat A; 6645 PetscInt n_neighs,*neighs,*n_shared,**shared; 6646 PetscMPIInt size,rank,color; 6647 PetscInt *xadj,*adjncy; 6648 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6649 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6650 PetscInt void_procs,*procs_candidates = NULL; 6651 PetscInt xadj_count,*count; 6652 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6653 PetscSubcomm psubcomm; 6654 MPI_Comm subcomm; 6655 PetscErrorCode ierr; 6656 6657 PetscFunctionBegin; 6658 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6659 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6660 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6661 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6662 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6663 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6664 6665 if (have_void) *have_void = PETSC_FALSE; 6666 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6667 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6668 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6669 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6670 im_active = !!n; 6671 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6672 void_procs = size - active_procs; 6673 /* get ranks of of non-active processes in mat communicator */ 6674 if (void_procs) { 6675 PetscInt ncand; 6676 6677 if (have_void) *have_void = PETSC_TRUE; 6678 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6679 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6680 for (i=0,ncand=0;i<size;i++) { 6681 if (!procs_candidates[i]) { 6682 procs_candidates[ncand++] = i; 6683 } 6684 } 6685 /* force n_subdomains to be not greater that the number of non-active processes */ 6686 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6687 } 6688 6689 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6690 number of subdomains requested 1 -> send to master or first candidate in voids */ 6691 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6692 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6693 PetscInt issize,isidx,dest; 6694 if (*n_subdomains == 1) dest = 0; 6695 else dest = rank; 6696 if (im_active) { 6697 issize = 1; 6698 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6699 isidx = procs_candidates[dest]; 6700 } else { 6701 isidx = dest; 6702 } 6703 } else { 6704 issize = 0; 6705 isidx = -1; 6706 } 6707 if (*n_subdomains != 1) *n_subdomains = active_procs; 6708 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6709 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6710 PetscFunctionReturn(0); 6711 } 6712 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6713 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6714 threshold = PetscMax(threshold,2); 6715 6716 /* Get info on mapping */ 6717 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6718 6719 /* build local CSR graph of subdomains' connectivity */ 6720 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6721 xadj[0] = 0; 6722 xadj[1] = PetscMax(n_neighs-1,0); 6723 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6724 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6725 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6726 for (i=1;i<n_neighs;i++) 6727 for (j=0;j<n_shared[i];j++) 6728 count[shared[i][j]] += 1; 6729 6730 xadj_count = 0; 6731 for (i=1;i<n_neighs;i++) { 6732 for (j=0;j<n_shared[i];j++) { 6733 if (count[shared[i][j]] < threshold) { 6734 adjncy[xadj_count] = neighs[i]; 6735 adjncy_wgt[xadj_count] = n_shared[i]; 6736 xadj_count++; 6737 break; 6738 } 6739 } 6740 } 6741 xadj[1] = xadj_count; 6742 ierr = PetscFree(count);CHKERRQ(ierr); 6743 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6744 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6745 6746 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6747 6748 /* Restrict work on active processes only */ 6749 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6750 if (void_procs) { 6751 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6752 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6753 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6754 subcomm = PetscSubcommChild(psubcomm); 6755 } else { 6756 psubcomm = NULL; 6757 subcomm = PetscObjectComm((PetscObject)mat); 6758 } 6759 6760 v_wgt = NULL; 6761 if (!color) { 6762 ierr = PetscFree(xadj);CHKERRQ(ierr); 6763 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6764 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6765 } else { 6766 Mat subdomain_adj; 6767 IS new_ranks,new_ranks_contig; 6768 MatPartitioning partitioner; 6769 PetscInt rstart=0,rend=0; 6770 PetscInt *is_indices,*oldranks; 6771 PetscMPIInt size; 6772 PetscBool aggregate; 6773 6774 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6775 if (void_procs) { 6776 PetscInt prank = rank; 6777 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6778 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6779 for (i=0;i<xadj[1];i++) { 6780 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6781 } 6782 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6783 } else { 6784 oldranks = NULL; 6785 } 6786 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6787 if (aggregate) { /* TODO: all this part could be made more efficient */ 6788 PetscInt lrows,row,ncols,*cols; 6789 PetscMPIInt nrank; 6790 PetscScalar *vals; 6791 6792 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6793 lrows = 0; 6794 if (nrank<redprocs) { 6795 lrows = size/redprocs; 6796 if (nrank<size%redprocs) lrows++; 6797 } 6798 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6799 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6800 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6801 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6802 row = nrank; 6803 ncols = xadj[1]-xadj[0]; 6804 cols = adjncy; 6805 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6806 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6807 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6808 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6809 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6810 ierr = PetscFree(xadj);CHKERRQ(ierr); 6811 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6812 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6813 ierr = PetscFree(vals);CHKERRQ(ierr); 6814 if (use_vwgt) { 6815 Vec v; 6816 const PetscScalar *array; 6817 PetscInt nl; 6818 6819 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6820 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 6821 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6822 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6823 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6824 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6825 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6826 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6827 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6828 ierr = VecDestroy(&v);CHKERRQ(ierr); 6829 } 6830 } else { 6831 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6832 if (use_vwgt) { 6833 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6834 v_wgt[0] = n; 6835 } 6836 } 6837 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6838 6839 /* Partition */ 6840 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6841 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6842 if (v_wgt) { 6843 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6844 } 6845 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6846 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6847 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6848 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6849 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6850 6851 /* renumber new_ranks to avoid "holes" in new set of processors */ 6852 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6853 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6854 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6855 if (!aggregate) { 6856 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6857 #if defined(PETSC_USE_DEBUG) 6858 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6859 #endif 6860 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6861 } else if (oldranks) { 6862 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6863 } else { 6864 ranks_send_to_idx[0] = is_indices[0]; 6865 } 6866 } else { 6867 PetscInt idxs[1]; 6868 PetscMPIInt tag; 6869 MPI_Request *reqs; 6870 6871 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6872 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6873 for (i=rstart;i<rend;i++) { 6874 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6875 } 6876 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6877 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6878 ierr = PetscFree(reqs);CHKERRQ(ierr); 6879 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6880 #if defined(PETSC_USE_DEBUG) 6881 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6882 #endif 6883 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6884 } else if (oldranks) { 6885 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6886 } else { 6887 ranks_send_to_idx[0] = idxs[0]; 6888 } 6889 } 6890 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6891 /* clean up */ 6892 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6893 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6894 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6895 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6896 } 6897 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6898 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6899 6900 /* assemble parallel IS for sends */ 6901 i = 1; 6902 if (!color) i=0; 6903 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6904 PetscFunctionReturn(0); 6905 } 6906 6907 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6908 6909 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6910 { 6911 Mat local_mat; 6912 IS is_sends_internal; 6913 PetscInt rows,cols,new_local_rows; 6914 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6915 PetscBool ismatis,isdense,newisdense,destroy_mat; 6916 ISLocalToGlobalMapping l2gmap; 6917 PetscInt* l2gmap_indices; 6918 const PetscInt* is_indices; 6919 MatType new_local_type; 6920 /* buffers */ 6921 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6922 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6923 PetscInt *recv_buffer_idxs_local; 6924 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6925 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6926 /* MPI */ 6927 MPI_Comm comm,comm_n; 6928 PetscSubcomm subcomm; 6929 PetscMPIInt n_sends,n_recvs,commsize; 6930 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6931 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6932 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6933 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6934 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6935 PetscErrorCode ierr; 6936 6937 PetscFunctionBegin; 6938 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6939 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6940 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6941 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6942 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6943 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6944 PetscValidLogicalCollectiveBool(mat,reuse,6); 6945 PetscValidLogicalCollectiveInt(mat,nis,8); 6946 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6947 if (nvecs) { 6948 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6949 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6950 } 6951 /* further checks */ 6952 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6953 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6954 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6955 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6956 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6957 if (reuse && *mat_n) { 6958 PetscInt mrows,mcols,mnrows,mncols; 6959 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6960 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6961 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6962 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6963 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6964 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6965 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6966 } 6967 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6968 PetscValidLogicalCollectiveInt(mat,bs,0); 6969 6970 /* prepare IS for sending if not provided */ 6971 if (!is_sends) { 6972 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6973 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6974 } else { 6975 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6976 is_sends_internal = is_sends; 6977 } 6978 6979 /* get comm */ 6980 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6981 6982 /* compute number of sends */ 6983 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6984 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6985 6986 /* compute number of receives */ 6987 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6988 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6989 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6990 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6991 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6992 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6993 ierr = PetscFree(iflags);CHKERRQ(ierr); 6994 6995 /* restrict comm if requested */ 6996 subcomm = 0; 6997 destroy_mat = PETSC_FALSE; 6998 if (restrict_comm) { 6999 PetscMPIInt color,subcommsize; 7000 7001 color = 0; 7002 if (restrict_full) { 7003 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7004 } else { 7005 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7006 } 7007 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7008 subcommsize = commsize - subcommsize; 7009 /* check if reuse has been requested */ 7010 if (reuse) { 7011 if (*mat_n) { 7012 PetscMPIInt subcommsize2; 7013 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 7014 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7015 comm_n = PetscObjectComm((PetscObject)*mat_n); 7016 } else { 7017 comm_n = PETSC_COMM_SELF; 7018 } 7019 } else { /* MAT_INITIAL_MATRIX */ 7020 PetscMPIInt rank; 7021 7022 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 7023 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7024 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7025 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7026 comm_n = PetscSubcommChild(subcomm); 7027 } 7028 /* flag to destroy *mat_n if not significative */ 7029 if (color) destroy_mat = PETSC_TRUE; 7030 } else { 7031 comm_n = comm; 7032 } 7033 7034 /* prepare send/receive buffers */ 7035 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 7036 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 7037 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 7038 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 7039 if (nis) { 7040 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 7041 } 7042 7043 /* Get data from local matrices */ 7044 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7045 /* TODO: See below some guidelines on how to prepare the local buffers */ 7046 /* 7047 send_buffer_vals should contain the raw values of the local matrix 7048 send_buffer_idxs should contain: 7049 - MatType_PRIVATE type 7050 - PetscInt size_of_l2gmap 7051 - PetscInt global_row_indices[size_of_l2gmap] 7052 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7053 */ 7054 else { 7055 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7056 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7057 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7058 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7059 send_buffer_idxs[1] = i; 7060 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7061 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 7062 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7063 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7064 for (i=0;i<n_sends;i++) { 7065 ilengths_vals[is_indices[i]] = len*len; 7066 ilengths_idxs[is_indices[i]] = len+2; 7067 } 7068 } 7069 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7070 /* additional is (if any) */ 7071 if (nis) { 7072 PetscMPIInt psum; 7073 PetscInt j; 7074 for (j=0,psum=0;j<nis;j++) { 7075 PetscInt plen; 7076 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7077 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7078 psum += len+1; /* indices + lenght */ 7079 } 7080 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7081 for (j=0,psum=0;j<nis;j++) { 7082 PetscInt plen; 7083 const PetscInt *is_array_idxs; 7084 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7085 send_buffer_idxs_is[psum] = plen; 7086 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7087 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 7088 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7089 psum += plen+1; /* indices + lenght */ 7090 } 7091 for (i=0;i<n_sends;i++) { 7092 ilengths_idxs_is[is_indices[i]] = psum; 7093 } 7094 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7095 } 7096 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7097 7098 buf_size_idxs = 0; 7099 buf_size_vals = 0; 7100 buf_size_idxs_is = 0; 7101 buf_size_vecs = 0; 7102 for (i=0;i<n_recvs;i++) { 7103 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7104 buf_size_vals += (PetscInt)olengths_vals[i]; 7105 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7106 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7107 } 7108 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7109 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7110 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7111 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7112 7113 /* get new tags for clean communications */ 7114 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7115 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7116 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7117 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7118 7119 /* allocate for requests */ 7120 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7121 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7122 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7123 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7124 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7125 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7126 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7127 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7128 7129 /* communications */ 7130 ptr_idxs = recv_buffer_idxs; 7131 ptr_vals = recv_buffer_vals; 7132 ptr_idxs_is = recv_buffer_idxs_is; 7133 ptr_vecs = recv_buffer_vecs; 7134 for (i=0;i<n_recvs;i++) { 7135 source_dest = onodes[i]; 7136 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7137 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7138 ptr_idxs += olengths_idxs[i]; 7139 ptr_vals += olengths_vals[i]; 7140 if (nis) { 7141 source_dest = onodes_is[i]; 7142 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7143 ptr_idxs_is += olengths_idxs_is[i]; 7144 } 7145 if (nvecs) { 7146 source_dest = onodes[i]; 7147 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7148 ptr_vecs += olengths_idxs[i]-2; 7149 } 7150 } 7151 for (i=0;i<n_sends;i++) { 7152 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7153 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7154 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7155 if (nis) { 7156 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7157 } 7158 if (nvecs) { 7159 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7160 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7161 } 7162 } 7163 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7164 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7165 7166 /* assemble new l2g map */ 7167 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7168 ptr_idxs = recv_buffer_idxs; 7169 new_local_rows = 0; 7170 for (i=0;i<n_recvs;i++) { 7171 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7172 ptr_idxs += olengths_idxs[i]; 7173 } 7174 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7175 ptr_idxs = recv_buffer_idxs; 7176 new_local_rows = 0; 7177 for (i=0;i<n_recvs;i++) { 7178 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 7179 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7180 ptr_idxs += olengths_idxs[i]; 7181 } 7182 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7183 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7184 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7185 7186 /* infer new local matrix type from received local matrices type */ 7187 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7188 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7189 if (n_recvs) { 7190 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7191 ptr_idxs = recv_buffer_idxs; 7192 for (i=0;i<n_recvs;i++) { 7193 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7194 new_local_type_private = MATAIJ_PRIVATE; 7195 break; 7196 } 7197 ptr_idxs += olengths_idxs[i]; 7198 } 7199 switch (new_local_type_private) { 7200 case MATDENSE_PRIVATE: 7201 new_local_type = MATSEQAIJ; 7202 bs = 1; 7203 break; 7204 case MATAIJ_PRIVATE: 7205 new_local_type = MATSEQAIJ; 7206 bs = 1; 7207 break; 7208 case MATBAIJ_PRIVATE: 7209 new_local_type = MATSEQBAIJ; 7210 break; 7211 case MATSBAIJ_PRIVATE: 7212 new_local_type = MATSEQSBAIJ; 7213 break; 7214 default: 7215 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7216 break; 7217 } 7218 } else { /* by default, new_local_type is seqaij */ 7219 new_local_type = MATSEQAIJ; 7220 bs = 1; 7221 } 7222 7223 /* create MATIS object if needed */ 7224 if (!reuse) { 7225 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7226 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7227 } else { 7228 /* it also destroys the local matrices */ 7229 if (*mat_n) { 7230 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7231 } else { /* this is a fake object */ 7232 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7233 } 7234 } 7235 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7236 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7237 7238 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7239 7240 /* Global to local map of received indices */ 7241 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7242 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7243 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7244 7245 /* restore attributes -> type of incoming data and its size */ 7246 buf_size_idxs = 0; 7247 for (i=0;i<n_recvs;i++) { 7248 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7249 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7250 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7251 } 7252 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7253 7254 /* set preallocation */ 7255 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7256 if (!newisdense) { 7257 PetscInt *new_local_nnz=0; 7258 7259 ptr_idxs = recv_buffer_idxs_local; 7260 if (n_recvs) { 7261 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7262 } 7263 for (i=0;i<n_recvs;i++) { 7264 PetscInt j; 7265 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7266 for (j=0;j<*(ptr_idxs+1);j++) { 7267 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7268 } 7269 } else { 7270 /* TODO */ 7271 } 7272 ptr_idxs += olengths_idxs[i]; 7273 } 7274 if (new_local_nnz) { 7275 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7276 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7277 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7278 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7279 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7280 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7281 } else { 7282 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7283 } 7284 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7285 } else { 7286 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7287 } 7288 7289 /* set values */ 7290 ptr_vals = recv_buffer_vals; 7291 ptr_idxs = recv_buffer_idxs_local; 7292 for (i=0;i<n_recvs;i++) { 7293 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7294 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7295 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7296 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7297 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7298 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7299 } else { 7300 /* TODO */ 7301 } 7302 ptr_idxs += olengths_idxs[i]; 7303 ptr_vals += olengths_vals[i]; 7304 } 7305 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7306 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7307 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7308 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7309 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7310 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7311 7312 #if 0 7313 if (!restrict_comm) { /* check */ 7314 Vec lvec,rvec; 7315 PetscReal infty_error; 7316 7317 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7318 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7319 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7320 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7321 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7322 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7323 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7324 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7325 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7326 } 7327 #endif 7328 7329 /* assemble new additional is (if any) */ 7330 if (nis) { 7331 PetscInt **temp_idxs,*count_is,j,psum; 7332 7333 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7334 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7335 ptr_idxs = recv_buffer_idxs_is; 7336 psum = 0; 7337 for (i=0;i<n_recvs;i++) { 7338 for (j=0;j<nis;j++) { 7339 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7340 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7341 psum += plen; 7342 ptr_idxs += plen+1; /* shift pointer to received data */ 7343 } 7344 } 7345 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7346 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7347 for (i=1;i<nis;i++) { 7348 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7349 } 7350 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7351 ptr_idxs = recv_buffer_idxs_is; 7352 for (i=0;i<n_recvs;i++) { 7353 for (j=0;j<nis;j++) { 7354 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7355 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7356 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7357 ptr_idxs += plen+1; /* shift pointer to received data */ 7358 } 7359 } 7360 for (i=0;i<nis;i++) { 7361 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7362 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7363 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7364 } 7365 ierr = PetscFree(count_is);CHKERRQ(ierr); 7366 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7367 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7368 } 7369 /* free workspace */ 7370 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7371 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7372 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7373 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7374 if (isdense) { 7375 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7376 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7377 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7378 } else { 7379 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7380 } 7381 if (nis) { 7382 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7383 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7384 } 7385 7386 if (nvecs) { 7387 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7388 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7389 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7390 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7391 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7392 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7393 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7394 /* set values */ 7395 ptr_vals = recv_buffer_vecs; 7396 ptr_idxs = recv_buffer_idxs_local; 7397 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7398 for (i=0;i<n_recvs;i++) { 7399 PetscInt j; 7400 for (j=0;j<*(ptr_idxs+1);j++) { 7401 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7402 } 7403 ptr_idxs += olengths_idxs[i]; 7404 ptr_vals += olengths_idxs[i]-2; 7405 } 7406 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7407 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7408 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7409 } 7410 7411 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7412 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7413 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7414 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7415 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7416 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7417 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7418 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7419 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7420 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7421 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7422 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7423 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7424 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7425 ierr = PetscFree(onodes);CHKERRQ(ierr); 7426 if (nis) { 7427 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7428 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7429 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7430 } 7431 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7432 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7433 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7434 for (i=0;i<nis;i++) { 7435 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7436 } 7437 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7438 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7439 } 7440 *mat_n = NULL; 7441 } 7442 PetscFunctionReturn(0); 7443 } 7444 7445 /* temporary hack into ksp private data structure */ 7446 #include <petsc/private/kspimpl.h> 7447 7448 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7449 { 7450 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7451 PC_IS *pcis = (PC_IS*)pc->data; 7452 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7453 Mat coarsedivudotp = NULL; 7454 Mat coarseG,t_coarse_mat_is; 7455 MatNullSpace CoarseNullSpace = NULL; 7456 ISLocalToGlobalMapping coarse_islg; 7457 IS coarse_is,*isarray; 7458 PetscInt i,im_active=-1,active_procs=-1; 7459 PetscInt nis,nisdofs,nisneu,nisvert; 7460 PC pc_temp; 7461 PCType coarse_pc_type; 7462 KSPType coarse_ksp_type; 7463 PetscBool multilevel_requested,multilevel_allowed; 7464 PetscBool isredundant,isbddc,isnn,coarse_reuse; 7465 PetscInt ncoarse,nedcfield; 7466 PetscBool compute_vecs = PETSC_FALSE; 7467 PetscScalar *array; 7468 MatReuse coarse_mat_reuse; 7469 PetscBool restr, full_restr, have_void; 7470 PetscMPIInt commsize; 7471 PetscErrorCode ierr; 7472 7473 PetscFunctionBegin; 7474 /* Assign global numbering to coarse dofs */ 7475 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7476 PetscInt ocoarse_size; 7477 compute_vecs = PETSC_TRUE; 7478 7479 pcbddc->new_primal_space = PETSC_TRUE; 7480 ocoarse_size = pcbddc->coarse_size; 7481 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7482 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7483 /* see if we can avoid some work */ 7484 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7485 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7486 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7487 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7488 coarse_reuse = PETSC_FALSE; 7489 } else { /* we can safely reuse already computed coarse matrix */ 7490 coarse_reuse = PETSC_TRUE; 7491 } 7492 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7493 coarse_reuse = PETSC_FALSE; 7494 } 7495 /* reset any subassembling information */ 7496 if (!coarse_reuse || pcbddc->recompute_topography) { 7497 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7498 } 7499 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7500 coarse_reuse = PETSC_TRUE; 7501 } 7502 /* assemble coarse matrix */ 7503 if (coarse_reuse && pcbddc->coarse_ksp) { 7504 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7505 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7506 coarse_mat_reuse = MAT_REUSE_MATRIX; 7507 } else { 7508 coarse_mat = NULL; 7509 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7510 } 7511 7512 /* creates temporary l2gmap and IS for coarse indexes */ 7513 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7514 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7515 7516 /* creates temporary MATIS object for coarse matrix */ 7517 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7518 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7519 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7520 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7521 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7522 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7523 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7524 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7525 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7526 7527 /* count "active" (i.e. with positive local size) and "void" processes */ 7528 im_active = !!(pcis->n); 7529 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7530 7531 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7532 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7533 /* full_restr : just use the receivers from the subassembling pattern */ 7534 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);CHKERRQ(ierr); 7535 coarse_mat_is = NULL; 7536 multilevel_allowed = PETSC_FALSE; 7537 multilevel_requested = PETSC_FALSE; 7538 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7539 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7540 if (multilevel_requested) { 7541 ncoarse = active_procs/pcbddc->coarsening_ratio; 7542 restr = PETSC_FALSE; 7543 full_restr = PETSC_FALSE; 7544 } else { 7545 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7546 restr = PETSC_TRUE; 7547 full_restr = PETSC_TRUE; 7548 } 7549 if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7550 ncoarse = PetscMax(1,ncoarse); 7551 if (!pcbddc->coarse_subassembling) { 7552 if (pcbddc->coarsening_ratio > 1) { 7553 if (multilevel_requested) { 7554 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7555 } else { 7556 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7557 } 7558 } else { 7559 PetscMPIInt rank; 7560 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7561 have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE; 7562 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7563 } 7564 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7565 PetscInt psum; 7566 if (pcbddc->coarse_ksp) psum = 1; 7567 else psum = 0; 7568 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7569 if (ncoarse < commsize) have_void = PETSC_TRUE; 7570 } 7571 /* determine if we can go multilevel */ 7572 if (multilevel_requested) { 7573 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7574 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7575 } 7576 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7577 7578 /* dump subassembling pattern */ 7579 if (pcbddc->dbg_flag && multilevel_allowed) { 7580 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7581 } 7582 7583 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7584 nedcfield = -1; 7585 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7586 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7587 const PetscInt *idxs; 7588 ISLocalToGlobalMapping tmap; 7589 7590 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7591 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7592 /* allocate space for temporary storage */ 7593 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7594 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7595 /* allocate for IS array */ 7596 nisdofs = pcbddc->n_ISForDofsLocal; 7597 if (pcbddc->nedclocal) { 7598 if (pcbddc->nedfield > -1) { 7599 nedcfield = pcbddc->nedfield; 7600 } else { 7601 nedcfield = 0; 7602 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7603 nisdofs = 1; 7604 } 7605 } 7606 nisneu = !!pcbddc->NeumannBoundariesLocal; 7607 nisvert = 0; /* nisvert is not used */ 7608 nis = nisdofs + nisneu + nisvert; 7609 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7610 /* dofs splitting */ 7611 for (i=0;i<nisdofs;i++) { 7612 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7613 if (nedcfield != i) { 7614 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7615 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7616 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7617 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7618 } else { 7619 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7620 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7621 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7622 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7623 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7624 } 7625 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7626 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7627 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7628 } 7629 /* neumann boundaries */ 7630 if (pcbddc->NeumannBoundariesLocal) { 7631 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7632 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7633 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7634 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7635 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7636 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7637 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7638 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7639 } 7640 /* free memory */ 7641 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7642 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7643 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7644 } else { 7645 nis = 0; 7646 nisdofs = 0; 7647 nisneu = 0; 7648 nisvert = 0; 7649 isarray = NULL; 7650 } 7651 /* destroy no longer needed map */ 7652 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7653 7654 /* subassemble */ 7655 if (multilevel_allowed) { 7656 Vec vp[1]; 7657 PetscInt nvecs = 0; 7658 PetscBool reuse,reuser; 7659 7660 if (coarse_mat) reuse = PETSC_TRUE; 7661 else reuse = PETSC_FALSE; 7662 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7663 vp[0] = NULL; 7664 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7665 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7666 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7667 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7668 nvecs = 1; 7669 7670 if (pcbddc->divudotp) { 7671 Mat B,loc_divudotp; 7672 Vec v,p; 7673 IS dummy; 7674 PetscInt np; 7675 7676 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7677 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7678 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7679 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7680 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7681 ierr = VecSet(p,1.);CHKERRQ(ierr); 7682 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7683 ierr = VecDestroy(&p);CHKERRQ(ierr); 7684 ierr = MatDestroy(&B);CHKERRQ(ierr); 7685 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7686 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7687 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7688 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7689 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7690 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7691 ierr = VecDestroy(&v);CHKERRQ(ierr); 7692 } 7693 } 7694 if (reuser) { 7695 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7696 } else { 7697 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7698 } 7699 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7700 PetscScalar *arraym,*arrayv; 7701 PetscInt nl; 7702 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7703 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7704 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7705 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7706 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7707 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7708 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7709 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7710 } else { 7711 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7712 } 7713 } else { 7714 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7715 } 7716 if (coarse_mat_is || coarse_mat) { 7717 PetscMPIInt size; 7718 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7719 if (!multilevel_allowed) { 7720 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7721 } else { 7722 Mat A; 7723 7724 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7725 if (coarse_mat_is) { 7726 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7727 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7728 coarse_mat = coarse_mat_is; 7729 } 7730 /* be sure we don't have MatSeqDENSE as local mat */ 7731 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7732 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7733 } 7734 } 7735 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7736 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7737 7738 /* create local to global scatters for coarse problem */ 7739 if (compute_vecs) { 7740 PetscInt lrows; 7741 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7742 if (coarse_mat) { 7743 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7744 } else { 7745 lrows = 0; 7746 } 7747 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7748 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7749 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7750 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7751 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7752 } 7753 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7754 7755 /* set defaults for coarse KSP and PC */ 7756 if (multilevel_allowed) { 7757 coarse_ksp_type = KSPRICHARDSON; 7758 coarse_pc_type = PCBDDC; 7759 } else { 7760 coarse_ksp_type = KSPPREONLY; 7761 coarse_pc_type = PCREDUNDANT; 7762 } 7763 7764 /* print some info if requested */ 7765 if (pcbddc->dbg_flag) { 7766 if (!multilevel_allowed) { 7767 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7768 if (multilevel_requested) { 7769 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7770 } else if (pcbddc->max_levels) { 7771 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7772 } 7773 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7774 } 7775 } 7776 7777 /* communicate coarse discrete gradient */ 7778 coarseG = NULL; 7779 if (pcbddc->nedcG && multilevel_allowed) { 7780 MPI_Comm ccomm; 7781 if (coarse_mat) { 7782 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7783 } else { 7784 ccomm = MPI_COMM_NULL; 7785 } 7786 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7787 } 7788 7789 /* create the coarse KSP object only once with defaults */ 7790 if (coarse_mat) { 7791 PetscViewer dbg_viewer = NULL; 7792 if (pcbddc->dbg_flag) { 7793 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7794 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7795 } 7796 if (!pcbddc->coarse_ksp) { 7797 char prefix[256],str_level[16]; 7798 size_t len; 7799 7800 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7801 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7802 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7803 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7804 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7805 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7806 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7807 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7808 /* TODO is this logic correct? should check for coarse_mat type */ 7809 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7810 /* prefix */ 7811 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7812 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7813 if (!pcbddc->current_level) { 7814 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7815 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7816 } else { 7817 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7818 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7819 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7820 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7821 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7822 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7823 } 7824 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7825 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7826 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7827 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7828 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7829 /* allow user customization */ 7830 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7831 } 7832 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7833 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7834 if (nisdofs) { 7835 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7836 for (i=0;i<nisdofs;i++) { 7837 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7838 } 7839 } 7840 if (nisneu) { 7841 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7842 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7843 } 7844 if (nisvert) { 7845 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7846 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7847 } 7848 if (coarseG) { 7849 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7850 } 7851 7852 /* get some info after set from options */ 7853 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7854 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7855 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7856 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 7857 if (isbddc && !multilevel_allowed) { 7858 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7859 isbddc = PETSC_FALSE; 7860 } 7861 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 7862 if (multilevel_requested && !isbddc && !isnn) { 7863 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 7864 isbddc = PETSC_TRUE; 7865 isnn = PETSC_FALSE; 7866 } 7867 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7868 if (isredundant) { 7869 KSP inner_ksp; 7870 PC inner_pc; 7871 7872 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7873 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7874 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7875 } 7876 7877 /* parameters which miss an API */ 7878 if (isbddc) { 7879 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7880 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7881 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7882 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7883 if (pcbddc_coarse->benign_saddle_point) { 7884 Mat coarsedivudotp_is; 7885 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7886 IS row,col; 7887 const PetscInt *gidxs; 7888 PetscInt n,st,M,N; 7889 7890 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7891 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7892 st = st-n; 7893 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7894 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7895 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7896 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7897 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7898 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7899 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7900 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7901 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7902 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7903 ierr = ISDestroy(&row);CHKERRQ(ierr); 7904 ierr = ISDestroy(&col);CHKERRQ(ierr); 7905 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7906 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7907 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7908 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7909 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7910 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7911 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7912 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7913 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7914 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7915 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7916 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7917 } 7918 } 7919 7920 /* propagate symmetry info of coarse matrix */ 7921 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7922 if (pc->pmat->symmetric_set) { 7923 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7924 } 7925 if (pc->pmat->hermitian_set) { 7926 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7927 } 7928 if (pc->pmat->spd_set) { 7929 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7930 } 7931 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7932 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7933 } 7934 /* set operators */ 7935 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7936 if (pcbddc->dbg_flag) { 7937 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7938 } 7939 } 7940 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 7941 ierr = PetscFree(isarray);CHKERRQ(ierr); 7942 #if 0 7943 { 7944 PetscViewer viewer; 7945 char filename[256]; 7946 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7947 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7948 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7949 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7950 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7951 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7952 } 7953 #endif 7954 7955 if (pcbddc->coarse_ksp) { 7956 Vec crhs,csol; 7957 7958 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7959 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7960 if (!csol) { 7961 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7962 } 7963 if (!crhs) { 7964 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7965 } 7966 } 7967 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7968 7969 /* compute null space for coarse solver if the benign trick has been requested */ 7970 if (pcbddc->benign_null) { 7971 7972 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7973 for (i=0;i<pcbddc->benign_n;i++) { 7974 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7975 } 7976 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7977 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7978 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7979 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7980 if (coarse_mat) { 7981 Vec nullv; 7982 PetscScalar *array,*array2; 7983 PetscInt nl; 7984 7985 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7986 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7987 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7988 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7989 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7990 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7991 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7992 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7993 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7994 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7995 } 7996 } 7997 7998 if (pcbddc->coarse_ksp) { 7999 PetscBool ispreonly; 8000 8001 if (CoarseNullSpace) { 8002 PetscBool isnull; 8003 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8004 if (isnull) { 8005 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8006 } 8007 /* TODO: add local nullspaces (if any) */ 8008 } 8009 /* setup coarse ksp */ 8010 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8011 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8012 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8013 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 8014 KSP check_ksp; 8015 KSPType check_ksp_type; 8016 PC check_pc; 8017 Vec check_vec,coarse_vec; 8018 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8019 PetscInt its; 8020 PetscBool compute_eigs; 8021 PetscReal *eigs_r,*eigs_c; 8022 PetscInt neigs; 8023 const char *prefix; 8024 8025 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8026 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8027 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8028 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8029 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8030 /* prevent from setup unneeded object */ 8031 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8032 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8033 if (ispreonly) { 8034 check_ksp_type = KSPPREONLY; 8035 compute_eigs = PETSC_FALSE; 8036 } else { 8037 check_ksp_type = KSPGMRES; 8038 compute_eigs = PETSC_TRUE; 8039 } 8040 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8041 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8042 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8043 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8044 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8045 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8046 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8047 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8048 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8049 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8050 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8051 /* create random vec */ 8052 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8053 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8054 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8055 /* solve coarse problem */ 8056 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8057 /* set eigenvalue estimation if preonly has not been requested */ 8058 if (compute_eigs) { 8059 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8060 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8061 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8062 if (neigs) { 8063 lambda_max = eigs_r[neigs-1]; 8064 lambda_min = eigs_r[0]; 8065 if (pcbddc->use_coarse_estimates) { 8066 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8067 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8068 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8069 } 8070 } 8071 } 8072 } 8073 8074 /* check coarse problem residual error */ 8075 if (pcbddc->dbg_flag) { 8076 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8077 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8078 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8079 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8080 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8081 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8082 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8083 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8084 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8085 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8086 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8087 if (CoarseNullSpace) { 8088 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8089 } 8090 if (compute_eigs) { 8091 PetscReal lambda_max_s,lambda_min_s; 8092 KSPConvergedReason reason; 8093 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8094 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8095 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8096 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8097 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8098 for (i=0;i<neigs;i++) { 8099 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8100 } 8101 } 8102 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8103 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8104 } 8105 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8106 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8107 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8108 if (compute_eigs) { 8109 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8110 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8111 } 8112 } 8113 } 8114 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8115 /* print additional info */ 8116 if (pcbddc->dbg_flag) { 8117 /* waits until all processes reaches this point */ 8118 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8119 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 8120 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8121 } 8122 8123 /* free memory */ 8124 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8125 PetscFunctionReturn(0); 8126 } 8127 8128 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8129 { 8130 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8131 PC_IS* pcis = (PC_IS*)pc->data; 8132 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8133 IS subset,subset_mult,subset_n; 8134 PetscInt local_size,coarse_size=0; 8135 PetscInt *local_primal_indices=NULL; 8136 const PetscInt *t_local_primal_indices; 8137 PetscErrorCode ierr; 8138 8139 PetscFunctionBegin; 8140 /* Compute global number of coarse dofs */ 8141 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8142 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8143 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8144 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8145 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8146 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8147 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8148 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8149 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8150 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8151 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8152 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8153 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 8154 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8155 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8156 8157 /* check numbering */ 8158 if (pcbddc->dbg_flag) { 8159 PetscScalar coarsesum,*array,*array2; 8160 PetscInt i; 8161 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8162 8163 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8164 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8165 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8166 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8167 /* counter */ 8168 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8169 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8170 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8171 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8172 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8173 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8174 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8175 for (i=0;i<pcbddc->local_primal_size;i++) { 8176 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8177 } 8178 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8179 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8180 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8181 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8182 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8183 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8184 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8185 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8186 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8187 for (i=0;i<pcis->n;i++) { 8188 if (array[i] != 0.0 && array[i] != array2[i]) { 8189 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8190 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8191 set_error = PETSC_TRUE; 8192 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8193 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8194 } 8195 } 8196 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8197 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8198 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8199 for (i=0;i<pcis->n;i++) { 8200 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8201 } 8202 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8203 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8204 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8205 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8206 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8207 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8208 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8209 PetscInt *gidxs; 8210 8211 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8212 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8213 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8214 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8215 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8216 for (i=0;i<pcbddc->local_primal_size;i++) { 8217 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8218 } 8219 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8220 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8221 } 8222 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8223 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8224 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8225 } 8226 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 8227 /* get back data */ 8228 *coarse_size_n = coarse_size; 8229 *local_primal_indices_n = local_primal_indices; 8230 PetscFunctionReturn(0); 8231 } 8232 8233 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8234 { 8235 IS localis_t; 8236 PetscInt i,lsize,*idxs,n; 8237 PetscScalar *vals; 8238 PetscErrorCode ierr; 8239 8240 PetscFunctionBegin; 8241 /* get indices in local ordering exploiting local to global map */ 8242 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8243 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8244 for (i=0;i<lsize;i++) vals[i] = 1.0; 8245 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8246 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8247 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8248 if (idxs) { /* multilevel guard */ 8249 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8250 } 8251 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8252 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8253 ierr = PetscFree(vals);CHKERRQ(ierr); 8254 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8255 /* now compute set in local ordering */ 8256 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8257 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8258 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8259 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8260 for (i=0,lsize=0;i<n;i++) { 8261 if (PetscRealPart(vals[i]) > 0.5) { 8262 lsize++; 8263 } 8264 } 8265 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8266 for (i=0,lsize=0;i<n;i++) { 8267 if (PetscRealPart(vals[i]) > 0.5) { 8268 idxs[lsize++] = i; 8269 } 8270 } 8271 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8272 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8273 *localis = localis_t; 8274 PetscFunctionReturn(0); 8275 } 8276 8277 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8278 { 8279 PC_IS *pcis=(PC_IS*)pc->data; 8280 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8281 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8282 Mat S_j; 8283 PetscInt *used_xadj,*used_adjncy; 8284 PetscBool free_used_adj; 8285 PetscErrorCode ierr; 8286 8287 PetscFunctionBegin; 8288 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8289 free_used_adj = PETSC_FALSE; 8290 if (pcbddc->sub_schurs_layers == -1) { 8291 used_xadj = NULL; 8292 used_adjncy = NULL; 8293 } else { 8294 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8295 used_xadj = pcbddc->mat_graph->xadj; 8296 used_adjncy = pcbddc->mat_graph->adjncy; 8297 } else if (pcbddc->computed_rowadj) { 8298 used_xadj = pcbddc->mat_graph->xadj; 8299 used_adjncy = pcbddc->mat_graph->adjncy; 8300 } else { 8301 PetscBool flg_row=PETSC_FALSE; 8302 const PetscInt *xadj,*adjncy; 8303 PetscInt nvtxs; 8304 8305 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8306 if (flg_row) { 8307 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8308 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8309 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8310 free_used_adj = PETSC_TRUE; 8311 } else { 8312 pcbddc->sub_schurs_layers = -1; 8313 used_xadj = NULL; 8314 used_adjncy = NULL; 8315 } 8316 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8317 } 8318 } 8319 8320 /* setup sub_schurs data */ 8321 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8322 if (!sub_schurs->schur_explicit) { 8323 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8324 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8325 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8326 } else { 8327 Mat change = NULL; 8328 Vec scaling = NULL; 8329 IS change_primal = NULL, iP; 8330 PetscInt benign_n; 8331 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8332 PetscBool isseqaij,need_change = PETSC_FALSE; 8333 PetscBool discrete_harmonic = PETSC_FALSE; 8334 8335 if (!pcbddc->use_vertices && reuse_solvers) { 8336 PetscInt n_vertices; 8337 8338 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8339 reuse_solvers = (PetscBool)!n_vertices; 8340 } 8341 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8342 if (!isseqaij) { 8343 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8344 if (matis->A == pcbddc->local_mat) { 8345 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8346 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8347 } else { 8348 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8349 } 8350 } 8351 if (!pcbddc->benign_change_explicit) { 8352 benign_n = pcbddc->benign_n; 8353 } else { 8354 benign_n = 0; 8355 } 8356 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8357 We need a global reduction to avoid possible deadlocks. 8358 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8359 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8360 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8361 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8362 need_change = (PetscBool)(!need_change); 8363 } 8364 /* If the user defines additional constraints, we import them here. 8365 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8366 if (need_change) { 8367 PC_IS *pcisf; 8368 PC_BDDC *pcbddcf; 8369 PC pcf; 8370 8371 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8372 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8373 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8374 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8375 8376 /* hacks */ 8377 pcisf = (PC_IS*)pcf->data; 8378 pcisf->is_B_local = pcis->is_B_local; 8379 pcisf->vec1_N = pcis->vec1_N; 8380 pcisf->BtoNmap = pcis->BtoNmap; 8381 pcisf->n = pcis->n; 8382 pcisf->n_B = pcis->n_B; 8383 pcbddcf = (PC_BDDC*)pcf->data; 8384 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8385 pcbddcf->mat_graph = pcbddc->mat_graph; 8386 pcbddcf->use_faces = PETSC_TRUE; 8387 pcbddcf->use_change_of_basis = PETSC_TRUE; 8388 pcbddcf->use_change_on_faces = PETSC_TRUE; 8389 pcbddcf->use_qr_single = PETSC_TRUE; 8390 pcbddcf->fake_change = PETSC_TRUE; 8391 8392 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8393 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8394 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8395 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8396 change = pcbddcf->ConstraintMatrix; 8397 pcbddcf->ConstraintMatrix = NULL; 8398 8399 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8400 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8401 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8402 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8403 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8404 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8405 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8406 pcf->ops->destroy = NULL; 8407 pcf->ops->reset = NULL; 8408 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8409 } 8410 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8411 8412 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 8413 if (iP) { 8414 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 8415 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 8416 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8417 } 8418 if (discrete_harmonic) { 8419 Mat A; 8420 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 8421 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 8422 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 8423 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8424 ierr = MatDestroy(&A);CHKERRQ(ierr); 8425 } else { 8426 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8427 } 8428 ierr = MatDestroy(&change);CHKERRQ(ierr); 8429 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8430 } 8431 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8432 8433 /* free adjacency */ 8434 if (free_used_adj) { 8435 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8436 } 8437 PetscFunctionReturn(0); 8438 } 8439 8440 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8441 { 8442 PC_IS *pcis=(PC_IS*)pc->data; 8443 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8444 PCBDDCGraph graph; 8445 PetscErrorCode ierr; 8446 8447 PetscFunctionBegin; 8448 /* attach interface graph for determining subsets */ 8449 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8450 IS verticesIS,verticescomm; 8451 PetscInt vsize,*idxs; 8452 8453 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8454 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8455 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8456 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8457 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8458 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8459 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8460 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8461 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8462 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8463 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8464 } else { 8465 graph = pcbddc->mat_graph; 8466 } 8467 /* print some info */ 8468 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8469 IS vertices; 8470 PetscInt nv,nedges,nfaces; 8471 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8472 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8473 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8474 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8475 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8476 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8477 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8478 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8479 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8480 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8481 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8482 } 8483 8484 /* sub_schurs init */ 8485 if (!pcbddc->sub_schurs) { 8486 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8487 } 8488 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8489 pcbddc->sub_schurs->prefix = ((PetscObject)pc)->prefix; 8490 8491 /* free graph struct */ 8492 if (pcbddc->sub_schurs_rebuild) { 8493 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8494 } 8495 PetscFunctionReturn(0); 8496 } 8497 8498 PetscErrorCode PCBDDCCheckOperator(PC pc) 8499 { 8500 PC_IS *pcis=(PC_IS*)pc->data; 8501 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8502 PetscErrorCode ierr; 8503 8504 PetscFunctionBegin; 8505 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8506 IS zerodiag = NULL; 8507 Mat S_j,B0_B=NULL; 8508 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8509 PetscScalar *p0_check,*array,*array2; 8510 PetscReal norm; 8511 PetscInt i; 8512 8513 /* B0 and B0_B */ 8514 if (zerodiag) { 8515 IS dummy; 8516 8517 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8518 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8519 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8520 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8521 } 8522 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8523 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8524 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8525 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8526 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8527 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8528 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8529 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8530 /* S_j */ 8531 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8532 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8533 8534 /* mimic vector in \widetilde{W}_\Gamma */ 8535 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8536 /* continuous in primal space */ 8537 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8538 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8539 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8540 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8541 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8542 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8543 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8544 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8545 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8546 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8547 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8548 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8549 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8550 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8551 8552 /* assemble rhs for coarse problem */ 8553 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8554 /* local with Schur */ 8555 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8556 if (zerodiag) { 8557 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8558 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8559 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8560 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8561 } 8562 /* sum on primal nodes the local contributions */ 8563 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8564 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8565 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8566 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8567 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8568 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8569 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8570 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8571 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8572 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8573 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8574 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8575 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8576 /* scale primal nodes (BDDC sums contibutions) */ 8577 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8578 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8579 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8580 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8581 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8582 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8583 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8584 /* global: \widetilde{B0}_B w_\Gamma */ 8585 if (zerodiag) { 8586 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8587 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8588 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8589 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8590 } 8591 /* BDDC */ 8592 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8593 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8594 8595 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8596 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8597 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8598 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8599 for (i=0;i<pcbddc->benign_n;i++) { 8600 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8601 } 8602 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8603 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8604 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8605 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8606 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8607 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8608 } 8609 PetscFunctionReturn(0); 8610 } 8611 8612 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8613 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8614 { 8615 Mat At; 8616 IS rows; 8617 PetscInt rst,ren; 8618 PetscErrorCode ierr; 8619 PetscLayout rmap; 8620 8621 PetscFunctionBegin; 8622 rst = ren = 0; 8623 if (ccomm != MPI_COMM_NULL) { 8624 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8625 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8626 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8627 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8628 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8629 } 8630 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8631 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8632 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8633 8634 if (ccomm != MPI_COMM_NULL) { 8635 Mat_MPIAIJ *a,*b; 8636 IS from,to; 8637 Vec gvec; 8638 PetscInt lsize; 8639 8640 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8641 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8642 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8643 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8644 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8645 a = (Mat_MPIAIJ*)At->data; 8646 b = (Mat_MPIAIJ*)(*B)->data; 8647 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8648 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8649 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8650 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8651 b->A = a->A; 8652 b->B = a->B; 8653 8654 b->donotstash = a->donotstash; 8655 b->roworiented = a->roworiented; 8656 b->rowindices = 0; 8657 b->rowvalues = 0; 8658 b->getrowactive = PETSC_FALSE; 8659 8660 (*B)->rmap = rmap; 8661 (*B)->factortype = A->factortype; 8662 (*B)->assembled = PETSC_TRUE; 8663 (*B)->insertmode = NOT_SET_VALUES; 8664 (*B)->preallocated = PETSC_TRUE; 8665 8666 if (a->colmap) { 8667 #if defined(PETSC_USE_CTABLE) 8668 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8669 #else 8670 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8671 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8672 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8673 #endif 8674 } else b->colmap = 0; 8675 if (a->garray) { 8676 PetscInt len; 8677 len = a->B->cmap->n; 8678 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8679 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8680 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8681 } else b->garray = 0; 8682 8683 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8684 b->lvec = a->lvec; 8685 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8686 8687 /* cannot use VecScatterCopy */ 8688 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8689 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8690 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8691 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8692 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8693 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8694 ierr = ISDestroy(&from);CHKERRQ(ierr); 8695 ierr = ISDestroy(&to);CHKERRQ(ierr); 8696 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8697 } 8698 ierr = MatDestroy(&At);CHKERRQ(ierr); 8699 PetscFunctionReturn(0); 8700 } 8701