1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 #if !defined(PETSC_USE_COMPLEX) 18 PetscScalar *uwork,*data,*U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM,bN,lwork,lierr,di = 1; 21 PetscInt ulw,i,nr,nc,n; 22 PetscErrorCode ierr; 23 24 PetscFunctionBegin; 25 #if defined(PETSC_MISSING_LAPACK_GESVD) 26 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 27 #else 28 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 29 if (!nr || !nc) PetscFunctionReturn(0); 30 31 /* workspace */ 32 if (!work) { 33 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 34 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 35 } else { 36 ulw = lw; 37 uwork = work; 38 } 39 n = PetscMin(nr,nc); 40 if (!rwork) { 41 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 42 } else { 43 sing = rwork; 44 } 45 46 /* SVD */ 47 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 49 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 50 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 51 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 52 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 53 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 54 ierr = PetscFPTrapPop();CHKERRQ(ierr); 55 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 56 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 57 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 58 if (!rwork) { 59 ierr = PetscFree(sing);CHKERRQ(ierr); 60 } 61 if (!work) { 62 ierr = PetscFree(uwork);CHKERRQ(ierr); 63 } 64 /* create B */ 65 if (!range) { 66 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 67 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 68 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 69 } else { 70 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 71 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 72 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 73 } 74 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 75 ierr = PetscFree(U);CHKERRQ(ierr); 76 #endif 77 #else /* PETSC_USE_COMPLEX */ 78 PetscFunctionBegin; 79 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 80 #endif 81 PetscFunctionReturn(0); 82 } 83 84 /* TODO REMOVE */ 85 #if defined(PRINT_GDET) 86 static int inc = 0; 87 static int lev = 0; 88 #endif 89 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 PetscScalar *vals,v; 122 123 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 124 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 125 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 126 /* v = PetscAbsScalar(vals[0]) */; 127 v = 1.; 128 cvals[0] = vals[0]/v; 129 cvals[1] = vals[1]/v; 130 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 131 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 132 #if defined(PRINT_GDET) 133 { 134 PetscViewer viewer; 135 char filename[256]; 136 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 137 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 138 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 140 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 141 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 142 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 143 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 144 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 145 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 146 } 147 #endif 148 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 149 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 150 } 151 152 PetscFunctionReturn(0); 153 } 154 155 PetscErrorCode PCBDDCNedelecSupport(PC pc) 156 { 157 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 158 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 159 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 160 Vec tvec; 161 PetscSF sfv; 162 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 163 MPI_Comm comm; 164 IS lned,primals,allprimals,nedfieldlocal; 165 IS *eedges,*extrows,*extcols,*alleedges; 166 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 167 PetscScalar *vals,*work; 168 PetscReal *rwork; 169 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 170 PetscInt ne,nv,Lv,order,n,field; 171 PetscInt n_neigh,*neigh,*n_shared,**shared; 172 PetscInt i,j,extmem,cum,maxsize,nee; 173 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 174 PetscInt *sfvleaves,*sfvroots; 175 PetscInt *corners,*cedges; 176 PetscInt *ecount,**eneighs,*vcount,**vneighs; 177 #if defined(PETSC_USE_DEBUG) 178 PetscInt *emarks; 179 #endif 180 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 181 PetscErrorCode ierr; 182 183 PetscFunctionBegin; 184 /* If the discrete gradient is defined for a subset of dofs and global is true, 185 it assumes G is given in global ordering for all the dofs. 186 Otherwise, the ordering is global for the Nedelec field */ 187 order = pcbddc->nedorder; 188 conforming = pcbddc->conforming; 189 field = pcbddc->nedfield; 190 global = pcbddc->nedglobal; 191 setprimal = PETSC_FALSE; 192 print = PETSC_FALSE; 193 singular = PETSC_FALSE; 194 195 /* Command line customization */ 196 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 199 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 200 /* print debug info TODO: to be removed */ 201 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 202 ierr = PetscOptionsEnd();CHKERRQ(ierr); 203 204 /* Return if there are no edges in the decomposition and the problem is not singular */ 205 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 206 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 207 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 208 if (!singular) { 209 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 210 lrc[0] = PETSC_FALSE; 211 for (i=0;i<n;i++) { 212 if (PetscRealPart(vals[i]) > 2.) { 213 lrc[0] = PETSC_TRUE; 214 break; 215 } 216 } 217 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 218 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 219 if (!lrc[1]) PetscFunctionReturn(0); 220 } 221 222 /* Get Nedelec field */ 223 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 224 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 225 if (pcbddc->n_ISForDofsLocal && field >= 0) { 226 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 227 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 228 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 229 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 230 ne = n; 231 nedfieldlocal = NULL; 232 global = PETSC_TRUE; 233 } else if (field == PETSC_DECIDE) { 234 PetscInt rst,ren,*idx; 235 236 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 237 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 238 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 239 for (i=rst;i<ren;i++) { 240 PetscInt nc; 241 242 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 243 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 244 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 245 } 246 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 247 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 248 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 249 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 250 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 251 } else { 252 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 253 } 254 255 /* Sanity checks */ 256 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 257 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 258 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 259 260 /* Just set primal dofs and return */ 261 if (setprimal) { 262 IS enedfieldlocal; 263 PetscInt *eidxs; 264 265 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 266 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 267 if (nedfieldlocal) { 268 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 269 for (i=0,cum=0;i<ne;i++) { 270 if (PetscRealPart(vals[idxs[i]]) > 2.) { 271 eidxs[cum++] = idxs[i]; 272 } 273 } 274 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 275 } else { 276 for (i=0,cum=0;i<ne;i++) { 277 if (PetscRealPart(vals[i]) > 2.) { 278 eidxs[cum++] = i; 279 } 280 } 281 } 282 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 283 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 284 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 285 ierr = PetscFree(eidxs);CHKERRQ(ierr); 286 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 287 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 288 PetscFunctionReturn(0); 289 } 290 291 /* Compute some l2g maps */ 292 if (nedfieldlocal) { 293 IS is; 294 295 /* need to map from the local Nedelec field to local numbering */ 296 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 297 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 298 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 299 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 300 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 301 if (global) { 302 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 303 el2g = al2g; 304 } else { 305 IS gis; 306 307 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 308 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 309 ierr = ISDestroy(&gis);CHKERRQ(ierr); 310 } 311 ierr = ISDestroy(&is);CHKERRQ(ierr); 312 } else { 313 /* restore default */ 314 pcbddc->nedfield = -1; 315 /* one ref for the destruction of al2g, one for el2g */ 316 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 317 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 318 el2g = al2g; 319 fl2g = NULL; 320 } 321 322 /* Start communication to drop connections for interior edges (for cc analysis only) */ 323 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 324 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 325 if (nedfieldlocal) { 326 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 327 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 328 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 329 } else { 330 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 331 } 332 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 333 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 334 335 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 336 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 337 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 338 if (global) { 339 PetscInt rst; 340 341 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 342 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 343 if (matis->sf_rootdata[i] < 2) { 344 matis->sf_rootdata[cum++] = i + rst; 345 } 346 } 347 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 348 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 349 } else { 350 PetscInt *tbz; 351 352 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 353 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 354 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 355 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 356 for (i=0,cum=0;i<ne;i++) 357 if (matis->sf_leafdata[idxs[i]] == 1) 358 tbz[cum++] = i; 359 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 360 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 361 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 362 ierr = PetscFree(tbz);CHKERRQ(ierr); 363 } 364 } else { /* we need the entire G to infer the nullspace */ 365 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 366 G = pcbddc->discretegradient; 367 } 368 369 /* Extract subdomain relevant rows of G */ 370 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 371 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 372 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 373 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 374 ierr = ISDestroy(&lned);CHKERRQ(ierr); 375 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 376 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 377 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 378 379 /* SF for nodal dofs communications */ 380 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 381 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 382 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 384 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 386 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 387 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 388 i = singular ? 2 : 1; 389 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 390 391 /* Destroy temporary G created in MATIS format and modified G */ 392 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 393 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 394 ierr = MatDestroy(&G);CHKERRQ(ierr); 395 396 if (print) { 397 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 398 ierr = MatView(lG,NULL);CHKERRQ(ierr); 399 } 400 401 /* Save lG for values insertion in change of basis */ 402 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 403 404 /* Analyze the edge-nodes connections (duplicate lG) */ 405 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 406 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 407 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 409 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 410 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 411 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 412 /* need to import the boundary specification to ensure the 413 proper detection of coarse edges' endpoints */ 414 if (pcbddc->DirichletBoundariesLocal) { 415 IS is; 416 417 if (fl2g) { 418 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 419 } else { 420 is = pcbddc->DirichletBoundariesLocal; 421 } 422 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 423 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 424 for (i=0;i<cum;i++) { 425 if (idxs[i] >= 0) { 426 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 427 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 428 } 429 } 430 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 431 if (fl2g) { 432 ierr = ISDestroy(&is);CHKERRQ(ierr); 433 } 434 } 435 if (pcbddc->NeumannBoundariesLocal) { 436 IS is; 437 438 if (fl2g) { 439 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 440 } else { 441 is = pcbddc->NeumannBoundariesLocal; 442 } 443 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 444 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 445 for (i=0;i<cum;i++) { 446 if (idxs[i] >= 0) { 447 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 448 } 449 } 450 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 451 if (fl2g) { 452 ierr = ISDestroy(&is);CHKERRQ(ierr); 453 } 454 } 455 456 /* Count neighs per dof */ 457 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 458 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 459 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 460 for (i=1,cum=0;i<n_neigh;i++) { 461 cum += n_shared[i]; 462 for (j=0;j<n_shared[i];j++) { 463 ecount[shared[i][j]]++; 464 } 465 } 466 if (ne) { 467 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 468 } 469 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 470 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 471 for (i=1;i<n_neigh;i++) { 472 for (j=0;j<n_shared[i];j++) { 473 PetscInt k = shared[i][j]; 474 eneighs[k][ecount[k]] = neigh[i]; 475 ecount[k]++; 476 } 477 } 478 for (i=0;i<ne;i++) { 479 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 480 } 481 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 482 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 483 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 484 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 485 for (i=1,cum=0;i<n_neigh;i++) { 486 cum += n_shared[i]; 487 for (j=0;j<n_shared[i];j++) { 488 vcount[shared[i][j]]++; 489 } 490 } 491 if (nv) { 492 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 493 } 494 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 495 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 496 for (i=1;i<n_neigh;i++) { 497 for (j=0;j<n_shared[i];j++) { 498 PetscInt k = shared[i][j]; 499 vneighs[k][vcount[k]] = neigh[i]; 500 vcount[k]++; 501 } 502 } 503 for (i=0;i<nv;i++) { 504 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 505 } 506 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 507 508 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 509 for proper detection of coarse edges' endpoints */ 510 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 511 for (i=0;i<ne;i++) { 512 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 513 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 514 } 515 } 516 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 517 if (!conforming) { 518 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 519 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 520 } 521 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 522 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 523 cum = 0; 524 for (i=0;i<ne;i++) { 525 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 526 if (!PetscBTLookup(btee,i)) { 527 marks[cum++] = i; 528 continue; 529 } 530 /* set badly connected edge dofs as primal */ 531 if (!conforming) { 532 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 533 marks[cum++] = i; 534 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 535 for (j=ii[i];j<ii[i+1];j++) { 536 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 537 } 538 } else { 539 /* every edge dofs should be connected trough a certain number of nodal dofs 540 to other edge dofs belonging to coarse edges 541 - at most 2 endpoints 542 - order-1 interior nodal dofs 543 - no undefined nodal dofs (nconn < order) 544 */ 545 PetscInt ends = 0,ints = 0, undef = 0; 546 for (j=ii[i];j<ii[i+1];j++) { 547 PetscInt v = jj[j],k; 548 PetscInt nconn = iit[v+1]-iit[v]; 549 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 550 if (nconn > order) ends++; 551 else if (nconn == order) ints++; 552 else undef++; 553 } 554 if (undef || ends > 2 || ints != order -1) { 555 marks[cum++] = i; 556 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 557 for (j=ii[i];j<ii[i+1];j++) { 558 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 559 } 560 } 561 } 562 } 563 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 564 if (!order && ii[i+1] != ii[i]) { 565 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 566 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 567 } 568 } 569 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 570 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 571 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 572 if (!conforming) { 573 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 574 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 575 } 576 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 577 578 /* identify splitpoints and corner candidates */ 579 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 580 if (print) { 581 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 582 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 583 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 584 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 585 } 586 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 587 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 588 for (i=0;i<nv;i++) { 589 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 590 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 591 if (!order) { /* variable order */ 592 PetscReal vorder = 0.; 593 594 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 595 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 596 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 597 ord = 1; 598 } 599 #if defined(PETSC_USE_DEBUG) 600 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 601 #endif 602 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 603 if (PetscBTLookup(btbd,jj[j])) { 604 bdir = PETSC_TRUE; 605 break; 606 } 607 if (vc != ecount[jj[j]]) { 608 sneighs = PETSC_FALSE; 609 } else { 610 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 611 for (k=0;k<vc;k++) { 612 if (vn[k] != en[k]) { 613 sneighs = PETSC_FALSE; 614 break; 615 } 616 } 617 } 618 } 619 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 620 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 621 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 622 } else if (test == ord) { 623 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 624 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 625 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 626 } else { 627 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 628 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 629 } 630 } 631 } 632 ierr = PetscFree(ecount);CHKERRQ(ierr); 633 ierr = PetscFree(vcount);CHKERRQ(ierr); 634 if (ne) { 635 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 636 } 637 if (nv) { 638 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 639 } 640 ierr = PetscFree(eneighs);CHKERRQ(ierr); 641 ierr = PetscFree(vneighs);CHKERRQ(ierr); 642 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 643 644 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 645 if (order != 1) { 646 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 647 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 648 for (i=0;i<nv;i++) { 649 if (PetscBTLookup(btvcand,i)) { 650 PetscBool found = PETSC_FALSE; 651 for (j=ii[i];j<ii[i+1] && !found;j++) { 652 PetscInt k,e = jj[j]; 653 if (PetscBTLookup(bte,e)) continue; 654 for (k=iit[e];k<iit[e+1];k++) { 655 PetscInt v = jjt[k]; 656 if (v != i && PetscBTLookup(btvcand,v)) { 657 found = PETSC_TRUE; 658 break; 659 } 660 } 661 } 662 if (!found) { 663 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 664 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 665 } else { 666 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 667 } 668 } 669 } 670 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 671 } 672 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 673 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 674 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 675 676 /* Get the local G^T explicitly */ 677 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 678 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 679 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 680 681 /* Mark interior nodal dofs */ 682 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 683 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 684 for (i=1;i<n_neigh;i++) { 685 for (j=0;j<n_shared[i];j++) { 686 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 687 } 688 } 689 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 690 691 /* communicate corners and splitpoints */ 692 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 693 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 694 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 695 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 696 697 if (print) { 698 IS tbz; 699 700 cum = 0; 701 for (i=0;i<nv;i++) 702 if (sfvleaves[i]) 703 vmarks[cum++] = i; 704 705 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 706 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 707 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 708 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 709 } 710 711 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 712 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 713 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 714 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 715 716 /* Zero rows of lGt corresponding to identified corners 717 and interior nodal dofs */ 718 cum = 0; 719 for (i=0;i<nv;i++) { 720 if (sfvleaves[i]) { 721 vmarks[cum++] = i; 722 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 723 } 724 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 725 } 726 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 727 if (print) { 728 IS tbz; 729 730 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 731 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 732 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 733 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 734 } 735 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 736 ierr = PetscFree(vmarks);CHKERRQ(ierr); 737 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 738 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 739 740 /* Recompute G */ 741 ierr = MatDestroy(&lG);CHKERRQ(ierr); 742 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 743 if (print) { 744 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 745 ierr = MatView(lG,NULL);CHKERRQ(ierr); 746 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 747 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 748 } 749 750 /* Get primal dofs (if any) */ 751 cum = 0; 752 for (i=0;i<ne;i++) { 753 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 754 } 755 if (fl2g) { 756 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 757 } 758 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 759 if (print) { 760 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 761 ierr = ISView(primals,NULL);CHKERRQ(ierr); 762 } 763 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 764 /* TODO: what if the user passed in some of them ? */ 765 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 766 ierr = ISDestroy(&primals);CHKERRQ(ierr); 767 768 /* Compute edge connectivity */ 769 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 770 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 771 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 772 if (fl2g) { 773 PetscBT btf; 774 PetscInt *iia,*jja,*iiu,*jju; 775 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 776 777 /* create CSR for all local dofs */ 778 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 779 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 780 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 781 iiu = pcbddc->mat_graph->xadj; 782 jju = pcbddc->mat_graph->adjncy; 783 } else if (pcbddc->use_local_adj) { 784 rest = PETSC_TRUE; 785 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 786 } else { 787 free = PETSC_TRUE; 788 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 789 iiu[0] = 0; 790 for (i=0;i<n;i++) { 791 iiu[i+1] = i+1; 792 jju[i] = -1; 793 } 794 } 795 796 /* import sizes of CSR */ 797 iia[0] = 0; 798 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 799 800 /* overwrite entries corresponding to the Nedelec field */ 801 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 802 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 803 for (i=0;i<ne;i++) { 804 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 805 iia[idxs[i]+1] = ii[i+1]-ii[i]; 806 } 807 808 /* iia in CSR */ 809 for (i=0;i<n;i++) iia[i+1] += iia[i]; 810 811 /* jja in CSR */ 812 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 813 for (i=0;i<n;i++) 814 if (!PetscBTLookup(btf,i)) 815 for (j=0;j<iiu[i+1]-iiu[i];j++) 816 jja[iia[i]+j] = jju[iiu[i]+j]; 817 818 /* map edge dofs connectivity */ 819 if (jj) { 820 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 821 for (i=0;i<ne;i++) { 822 PetscInt e = idxs[i]; 823 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 824 } 825 } 826 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 827 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 828 if (rest) { 829 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 830 } 831 if (free) { 832 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 833 } 834 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 835 } else { 836 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 837 } 838 839 /* Analyze interface for edge dofs */ 840 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 841 pcbddc->mat_graph->twodim = PETSC_FALSE; 842 843 /* Get coarse edges in the edge space */ 844 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 845 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 846 847 if (fl2g) { 848 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 849 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 850 for (i=0;i<nee;i++) { 851 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 852 } 853 } else { 854 eedges = alleedges; 855 primals = allprimals; 856 } 857 858 /* Mark fine edge dofs with their coarse edge id */ 859 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 860 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 861 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 862 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 863 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 864 if (print) { 865 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 866 ierr = ISView(primals,NULL);CHKERRQ(ierr); 867 } 868 869 maxsize = 0; 870 for (i=0;i<nee;i++) { 871 PetscInt size,mark = i+1; 872 873 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 874 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 875 for (j=0;j<size;j++) marks[idxs[j]] = mark; 876 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 877 maxsize = PetscMax(maxsize,size); 878 } 879 880 /* Find coarse edge endpoints */ 881 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 882 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 883 for (i=0;i<nee;i++) { 884 PetscInt mark = i+1,size; 885 886 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 887 if (!size && nedfieldlocal) continue; 888 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 889 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 890 if (print) { 891 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 892 ISView(eedges[i],NULL); 893 } 894 for (j=0;j<size;j++) { 895 PetscInt k, ee = idxs[j]; 896 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 897 for (k=ii[ee];k<ii[ee+1];k++) { 898 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 899 if (PetscBTLookup(btv,jj[k])) { 900 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 901 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 902 PetscInt k2; 903 PetscBool corner = PETSC_FALSE; 904 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 905 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 906 /* it's a corner if either is connected with an edge dof belonging to a different cc or 907 if the edge dof lie on the natural part of the boundary */ 908 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 909 corner = PETSC_TRUE; 910 break; 911 } 912 } 913 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 914 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 915 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 916 } else { 917 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 918 } 919 } 920 } 921 } 922 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 923 } 924 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 925 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 926 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 927 928 /* Reset marked primal dofs */ 929 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 930 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 931 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 932 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 933 934 /* Now use the initial lG */ 935 ierr = MatDestroy(&lG);CHKERRQ(ierr); 936 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 937 lG = lGinit; 938 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 939 940 /* Compute extended cols indices */ 941 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 942 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 943 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 944 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 945 i *= maxsize; 946 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 947 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 948 eerr = PETSC_FALSE; 949 for (i=0;i<nee;i++) { 950 PetscInt size,found = 0; 951 952 cum = 0; 953 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 954 if (!size && nedfieldlocal) continue; 955 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 956 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 957 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 958 for (j=0;j<size;j++) { 959 PetscInt k,ee = idxs[j]; 960 for (k=ii[ee];k<ii[ee+1];k++) { 961 PetscInt vv = jj[k]; 962 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 963 else if (!PetscBTLookupSet(btvc,vv)) found++; 964 } 965 } 966 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 967 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 968 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 969 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 970 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 971 /* it may happen that endpoints are not defined at this point 972 if it is the case, mark this edge for a second pass */ 973 if (cum != size -1 || found != 2) { 974 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 975 if (print) { 976 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 977 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 978 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 979 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 980 } 981 eerr = PETSC_TRUE; 982 } 983 } 984 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 985 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 986 if (done) { 987 PetscInt *newprimals; 988 989 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 990 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 991 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 992 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 993 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 994 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 995 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 996 for (i=0;i<nee;i++) { 997 PetscBool has_candidates = PETSC_FALSE; 998 if (PetscBTLookup(bter,i)) { 999 PetscInt size,mark = i+1; 1000 1001 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1002 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1003 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1004 for (j=0;j<size;j++) { 1005 PetscInt k,ee = idxs[j]; 1006 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1007 for (k=ii[ee];k<ii[ee+1];k++) { 1008 /* set all candidates located on the edge as corners */ 1009 if (PetscBTLookup(btvcand,jj[k])) { 1010 PetscInt k2,vv = jj[k]; 1011 has_candidates = PETSC_TRUE; 1012 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1013 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1014 /* set all edge dofs connected to candidate as primals */ 1015 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1016 if (marks[jjt[k2]] == mark) { 1017 PetscInt k3,ee2 = jjt[k2]; 1018 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1019 newprimals[cum++] = ee2; 1020 /* finally set the new corners */ 1021 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1022 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1023 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1024 } 1025 } 1026 } 1027 } else { 1028 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1029 } 1030 } 1031 } 1032 if (!has_candidates) { /* circular edge */ 1033 PetscInt k, ee = idxs[0],*tmarks; 1034 1035 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1036 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1037 for (k=ii[ee];k<ii[ee+1];k++) { 1038 PetscInt k2; 1039 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1040 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1041 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1042 } 1043 for (j=0;j<size;j++) { 1044 if (tmarks[idxs[j]] > 1) { 1045 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1046 newprimals[cum++] = idxs[j]; 1047 } 1048 } 1049 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1050 } 1051 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1052 } 1053 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1054 } 1055 ierr = PetscFree(extcols);CHKERRQ(ierr); 1056 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1057 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1058 if (fl2g) { 1059 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1060 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1061 for (i=0;i<nee;i++) { 1062 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1063 } 1064 ierr = PetscFree(eedges);CHKERRQ(ierr); 1065 } 1066 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1067 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1068 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1069 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1070 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1071 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1072 pcbddc->mat_graph->twodim = PETSC_FALSE; 1073 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1074 if (fl2g) { 1075 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1076 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1077 for (i=0;i<nee;i++) { 1078 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1079 } 1080 } else { 1081 eedges = alleedges; 1082 primals = allprimals; 1083 } 1084 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1085 1086 /* Mark again */ 1087 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1088 for (i=0;i<nee;i++) { 1089 PetscInt size,mark = i+1; 1090 1091 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1092 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1093 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1094 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1095 } 1096 if (print) { 1097 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1098 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1099 } 1100 1101 /* Recompute extended cols */ 1102 eerr = PETSC_FALSE; 1103 for (i=0;i<nee;i++) { 1104 PetscInt size; 1105 1106 cum = 0; 1107 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1108 if (!size && nedfieldlocal) continue; 1109 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1110 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1111 for (j=0;j<size;j++) { 1112 PetscInt k,ee = idxs[j]; 1113 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1114 } 1115 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1116 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1117 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1118 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1119 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1120 if (cum != size -1) { 1121 if (print) { 1122 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1123 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1125 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1126 } 1127 eerr = PETSC_TRUE; 1128 } 1129 } 1130 } 1131 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1132 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1133 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1134 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1135 /* an error should not occur at this point */ 1136 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1137 1138 /* Check the number of endpoints */ 1139 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1140 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1141 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1142 for (i=0;i<nee;i++) { 1143 PetscInt size, found = 0, gc[2]; 1144 1145 /* init with defaults */ 1146 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1147 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1148 if (!size && nedfieldlocal) continue; 1149 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1150 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1151 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1152 for (j=0;j<size;j++) { 1153 PetscInt k,ee = idxs[j]; 1154 for (k=ii[ee];k<ii[ee+1];k++) { 1155 PetscInt vv = jj[k]; 1156 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1157 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1158 corners[i*2+found++] = vv; 1159 } 1160 } 1161 } 1162 if (found != 2) { 1163 PetscInt e; 1164 if (fl2g) { 1165 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1166 } else { 1167 e = idxs[0]; 1168 } 1169 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1170 } 1171 1172 /* get primal dof index on this coarse edge */ 1173 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1174 if (gc[0] > gc[1]) { 1175 PetscInt swap = corners[2*i]; 1176 corners[2*i] = corners[2*i+1]; 1177 corners[2*i+1] = swap; 1178 } 1179 cedges[i] = idxs[size-1]; 1180 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1181 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1182 } 1183 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1184 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1185 1186 #if defined(PETSC_USE_DEBUG) 1187 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1188 not interfere with neighbouring coarse edges */ 1189 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1190 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1191 for (i=0;i<nv;i++) { 1192 PetscInt emax = 0,eemax = 0; 1193 1194 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1195 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1196 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1197 for (j=1;j<nee+1;j++) { 1198 if (emax < emarks[j]) { 1199 emax = emarks[j]; 1200 eemax = j; 1201 } 1202 } 1203 /* not relevant for edges */ 1204 if (!eemax) continue; 1205 1206 for (j=ii[i];j<ii[i+1];j++) { 1207 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1208 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1209 } 1210 } 1211 } 1212 ierr = PetscFree(emarks);CHKERRQ(ierr); 1213 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1214 #endif 1215 1216 /* Compute extended rows indices for edge blocks of the change of basis */ 1217 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1218 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1219 extmem *= maxsize; 1220 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1221 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1222 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1223 for (i=0;i<nv;i++) { 1224 PetscInt mark = 0,size,start; 1225 1226 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1227 for (j=ii[i];j<ii[i+1];j++) 1228 if (marks[jj[j]] && !mark) 1229 mark = marks[jj[j]]; 1230 1231 /* not relevant */ 1232 if (!mark) continue; 1233 1234 /* import extended row */ 1235 mark--; 1236 start = mark*extmem+extrowcum[mark]; 1237 size = ii[i+1]-ii[i]; 1238 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1239 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1240 extrowcum[mark] += size; 1241 } 1242 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1243 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1244 ierr = PetscFree(marks);CHKERRQ(ierr); 1245 1246 /* Compress extrows */ 1247 cum = 0; 1248 for (i=0;i<nee;i++) { 1249 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1250 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1251 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1252 cum = PetscMax(cum,size); 1253 } 1254 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1255 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1256 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1257 1258 /* Workspace for lapack inner calls and VecSetValues */ 1259 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1260 1261 /* Create change of basis matrix (preallocation can be improved) */ 1262 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1263 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1264 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1265 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1266 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1267 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1268 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1269 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1270 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1271 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1272 1273 /* Defaults to identity */ 1274 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1275 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1276 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1277 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1278 1279 /* Create discrete gradient for the coarser level if needed */ 1280 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1281 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1282 if (pcbddc->current_level < pcbddc->max_levels) { 1283 ISLocalToGlobalMapping cel2g,cvl2g; 1284 IS wis,gwis; 1285 PetscInt cnv,cne; 1286 1287 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1288 if (fl2g) { 1289 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1290 } else { 1291 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1292 pcbddc->nedclocal = wis; 1293 } 1294 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1295 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1296 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1297 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1298 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1299 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1300 1301 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1302 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1303 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1304 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1305 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1306 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1307 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1308 1309 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1310 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1311 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1312 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1313 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1314 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1315 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1316 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1317 } 1318 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1319 1320 #if defined(PRINT_GDET) 1321 inc = 0; 1322 lev = pcbddc->current_level; 1323 #endif 1324 1325 /* Insert values in the change of basis matrix */ 1326 for (i=0;i<nee;i++) { 1327 Mat Gins = NULL, GKins = NULL; 1328 IS cornersis = NULL; 1329 PetscScalar cvals[2]; 1330 1331 if (pcbddc->nedcG) { 1332 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1333 } 1334 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1335 if (Gins && GKins) { 1336 PetscScalar *data; 1337 const PetscInt *rows,*cols; 1338 PetscInt nrh,nch,nrc,ncc; 1339 1340 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1341 /* H1 */ 1342 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1343 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1344 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1345 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1346 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1347 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1348 /* complement */ 1349 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1350 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1351 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1352 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1353 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1354 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1355 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1356 1357 /* coarse discrete gradient */ 1358 if (pcbddc->nedcG) { 1359 PetscInt cols[2]; 1360 1361 cols[0] = 2*i; 1362 cols[1] = 2*i+1; 1363 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1364 } 1365 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1366 } 1367 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1368 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1369 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1370 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1371 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1372 } 1373 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1374 1375 /* Start assembling */ 1376 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1377 if (pcbddc->nedcG) { 1378 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1379 } 1380 1381 /* Free */ 1382 if (fl2g) { 1383 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1384 for (i=0;i<nee;i++) { 1385 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(eedges);CHKERRQ(ierr); 1388 } 1389 1390 /* hack mat_graph with primal dofs on the coarse edges */ 1391 { 1392 PCBDDCGraph graph = pcbddc->mat_graph; 1393 PetscInt *oqueue = graph->queue; 1394 PetscInt *ocptr = graph->cptr; 1395 PetscInt ncc,*idxs; 1396 1397 /* find first primal edge */ 1398 if (pcbddc->nedclocal) { 1399 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1400 } else { 1401 if (fl2g) { 1402 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1403 } 1404 idxs = cedges; 1405 } 1406 cum = 0; 1407 while (cum < nee && cedges[cum] < 0) cum++; 1408 1409 /* adapt connected components */ 1410 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1411 graph->cptr[0] = 0; 1412 for (i=0,ncc=0;i<graph->ncc;i++) { 1413 PetscInt lc = ocptr[i+1]-ocptr[i]; 1414 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1415 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1416 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1417 ncc++; 1418 lc--; 1419 cum++; 1420 while (cum < nee && cedges[cum] < 0) cum++; 1421 } 1422 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1423 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1424 ncc++; 1425 } 1426 graph->ncc = ncc; 1427 if (pcbddc->nedclocal) { 1428 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1429 } 1430 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1431 } 1432 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1433 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1434 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1435 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1436 1437 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1438 ierr = PetscFree(extrow);CHKERRQ(ierr); 1439 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1440 ierr = PetscFree(corners);CHKERRQ(ierr); 1441 ierr = PetscFree(cedges);CHKERRQ(ierr); 1442 ierr = PetscFree(extrows);CHKERRQ(ierr); 1443 ierr = PetscFree(extcols);CHKERRQ(ierr); 1444 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1445 1446 /* Complete assembling */ 1447 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1448 if (pcbddc->nedcG) { 1449 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1450 #if 0 1451 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1452 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1453 #endif 1454 } 1455 1456 /* set change of basis */ 1457 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1458 ierr = MatDestroy(&T);CHKERRQ(ierr); 1459 1460 PetscFunctionReturn(0); 1461 } 1462 1463 /* the near-null space of BDDC carries information on quadrature weights, 1464 and these can be collinear -> so cheat with MatNullSpaceCreate 1465 and create a suitable set of basis vectors first */ 1466 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1467 { 1468 PetscErrorCode ierr; 1469 PetscInt i; 1470 1471 PetscFunctionBegin; 1472 for (i=0;i<nvecs;i++) { 1473 PetscInt first,last; 1474 1475 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1476 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1477 if (i>=first && i < last) { 1478 PetscScalar *data; 1479 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1480 if (!has_const) { 1481 data[i-first] = 1.; 1482 } else { 1483 data[2*i-first] = 1./PetscSqrtReal(2.); 1484 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1485 } 1486 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1487 } 1488 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1489 } 1490 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1491 for (i=0;i<nvecs;i++) { /* reset vectors */ 1492 PetscInt first,last; 1493 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1494 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1495 if (i>=first && i < last) { 1496 PetscScalar *data; 1497 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1498 if (!has_const) { 1499 data[i-first] = 0.; 1500 } else { 1501 data[2*i-first] = 0.; 1502 data[2*i-first+1] = 0.; 1503 } 1504 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1505 } 1506 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1507 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1508 } 1509 PetscFunctionReturn(0); 1510 } 1511 1512 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1513 { 1514 Mat loc_divudotp; 1515 Vec p,v,vins,quad_vec,*quad_vecs; 1516 ISLocalToGlobalMapping map; 1517 PetscScalar *vals; 1518 const PetscScalar *array; 1519 PetscInt i,maxneighs,maxsize; 1520 PetscInt n_neigh,*neigh,*n_shared,**shared; 1521 PetscMPIInt rank; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1526 ierr = MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1527 maxsize = 0; 1528 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1529 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1530 /* create vectors to hold quadrature weights */ 1531 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1532 if (!transpose) { 1533 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1534 } else { 1535 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1536 } 1537 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1538 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1539 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1540 for (i=0;i<maxneighs;i++) { 1541 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1542 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1543 } 1544 1545 /* compute local quad vec */ 1546 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1547 if (!transpose) { 1548 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1549 } else { 1550 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1551 } 1552 ierr = VecSet(p,1.);CHKERRQ(ierr); 1553 if (!transpose) { 1554 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1555 } else { 1556 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1557 } 1558 if (vl2l) { 1559 Mat lA; 1560 VecScatter sc; 1561 1562 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1563 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1564 ierr = VecScatterCreate(v,vl2l,vins,NULL,&sc);CHKERRQ(ierr); 1565 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1566 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1567 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1568 } else { 1569 vins = v; 1570 } 1571 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1572 ierr = VecDestroy(&p);CHKERRQ(ierr); 1573 1574 /* insert in global quadrature vecs */ 1575 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1576 for (i=0;i<n_neigh;i++) { 1577 const PetscInt *idxs; 1578 PetscInt idx,nn,j; 1579 1580 idxs = shared[i]; 1581 nn = n_shared[i]; 1582 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1583 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1584 idx = -(idx+1); 1585 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1586 } 1587 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1588 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1589 if (vl2l) { 1590 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1591 } 1592 ierr = VecDestroy(&v);CHKERRQ(ierr); 1593 ierr = PetscFree(vals);CHKERRQ(ierr); 1594 1595 /* assemble near null space */ 1596 for (i=0;i<maxneighs;i++) { 1597 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1598 } 1599 for (i=0;i<maxneighs;i++) { 1600 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1601 ierr = VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");CHKERRQ(ierr); 1602 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1603 } 1604 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1605 PetscFunctionReturn(0); 1606 } 1607 1608 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1609 { 1610 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1611 PetscErrorCode ierr; 1612 1613 PetscFunctionBegin; 1614 if (primalv) { 1615 if (pcbddc->user_primal_vertices_local) { 1616 IS list[2], newp; 1617 1618 list[0] = primalv; 1619 list[1] = pcbddc->user_primal_vertices_local; 1620 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1621 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1622 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1623 pcbddc->user_primal_vertices_local = newp; 1624 } else { 1625 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1626 } 1627 } 1628 PetscFunctionReturn(0); 1629 } 1630 1631 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1632 { 1633 PetscInt f, *comp = (PetscInt *)ctx; 1634 1635 PetscFunctionBegin; 1636 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1637 PetscFunctionReturn(0); 1638 } 1639 1640 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1641 { 1642 PetscErrorCode ierr; 1643 Vec local,global; 1644 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1645 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1646 PetscBool monolithic = PETSC_FALSE; 1647 1648 PetscFunctionBegin; 1649 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1650 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1651 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1652 /* need to convert from global to local topology information and remove references to information in global ordering */ 1653 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1654 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1655 if (monolithic) { /* just get block size to properly compute vertices */ 1656 if (pcbddc->vertex_size == 1) { 1657 ierr = MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);CHKERRQ(ierr); 1658 } 1659 goto boundary; 1660 } 1661 1662 if (pcbddc->user_provided_isfordofs) { 1663 if (pcbddc->n_ISForDofs) { 1664 PetscInt i; 1665 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1666 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1667 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1668 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1669 } 1670 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1671 pcbddc->n_ISForDofs = 0; 1672 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1673 } 1674 } else { 1675 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1676 DM dm; 1677 1678 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1679 if (!dm) { 1680 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1681 } 1682 if (dm) { 1683 IS *fields; 1684 PetscInt nf,i; 1685 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1686 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1687 for (i=0;i<nf;i++) { 1688 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1689 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1690 } 1691 ierr = PetscFree(fields);CHKERRQ(ierr); 1692 pcbddc->n_ISForDofsLocal = nf; 1693 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1694 PetscContainer c; 1695 1696 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1697 if (c) { 1698 MatISLocalFields lf; 1699 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1700 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1701 } else { /* fallback, create the default fields if bs > 1 */ 1702 PetscInt i, n = matis->A->rmap->n; 1703 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1704 if (i > 1) { 1705 pcbddc->n_ISForDofsLocal = i; 1706 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1707 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1708 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1709 } 1710 } 1711 } 1712 } 1713 } else { 1714 PetscInt i; 1715 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1716 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1717 } 1718 } 1719 } 1720 1721 boundary: 1722 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1723 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1724 } else if (pcbddc->DirichletBoundariesLocal) { 1725 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1726 } 1727 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1728 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1729 } else if (pcbddc->NeumannBoundariesLocal) { 1730 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1731 } 1732 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1733 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1734 } 1735 ierr = VecDestroy(&global);CHKERRQ(ierr); 1736 ierr = VecDestroy(&local);CHKERRQ(ierr); 1737 /* detect local disconnected subdomains if requested (use matis->A) */ 1738 if (pcbddc->detect_disconnected) { 1739 IS primalv = NULL; 1740 PetscInt i; 1741 1742 for (i=0;i<pcbddc->n_local_subs;i++) { 1743 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1744 } 1745 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1746 ierr = PCBDDCDetectDisconnectedComponents(pc,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1747 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1748 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1749 } 1750 /* early stage corner detection */ 1751 { 1752 DM dm; 1753 1754 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1755 if (dm) { 1756 PetscBool isda; 1757 1758 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1759 if (isda) { 1760 ISLocalToGlobalMapping l2l; 1761 IS corners; 1762 Mat lA; 1763 1764 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1765 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1766 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1767 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1768 if (l2l) { 1769 const PetscInt *idx; 1770 PetscInt bs,*idxout,n; 1771 1772 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1773 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1774 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1775 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1776 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1777 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1778 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1779 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1780 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1781 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1782 pcbddc->corner_selected = PETSC_TRUE; 1783 } else { /* not from DMDA */ 1784 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1785 } 1786 } 1787 } 1788 } 1789 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1790 DM dm; 1791 1792 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1793 if (!dm) { 1794 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1795 } 1796 if (dm) { 1797 Vec vcoords; 1798 PetscSection section; 1799 PetscReal *coords; 1800 PetscInt d,cdim,nl,nf,**ctxs; 1801 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1802 1803 ierr = DMGetCoordinateDim(dm,&cdim);CHKERRQ(ierr); 1804 ierr = DMGetDefaultSection(dm,§ion);CHKERRQ(ierr); 1805 ierr = PetscSectionGetNumFields(section,&nf);CHKERRQ(ierr); 1806 ierr = DMCreateGlobalVector(dm,&vcoords);CHKERRQ(ierr); 1807 ierr = VecGetLocalSize(vcoords,&nl);CHKERRQ(ierr); 1808 ierr = PetscMalloc1(nl*cdim,&coords);CHKERRQ(ierr); 1809 ierr = PetscMalloc2(nf,&funcs,nf,&ctxs);CHKERRQ(ierr); 1810 ierr = PetscMalloc1(nf,&ctxs[0]);CHKERRQ(ierr); 1811 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1812 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1813 for (d=0;d<cdim;d++) { 1814 PetscInt i; 1815 const PetscScalar *v; 1816 1817 for (i=0;i<nf;i++) ctxs[i][0] = d; 1818 ierr = DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);CHKERRQ(ierr); 1819 ierr = VecGetArrayRead(vcoords,&v);CHKERRQ(ierr); 1820 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1821 ierr = VecRestoreArrayRead(vcoords,&v);CHKERRQ(ierr); 1822 } 1823 ierr = VecDestroy(&vcoords);CHKERRQ(ierr); 1824 ierr = PCSetCoordinates(pc,cdim,nl,coords);CHKERRQ(ierr); 1825 ierr = PetscFree(coords);CHKERRQ(ierr); 1826 ierr = PetscFree(ctxs[0]);CHKERRQ(ierr); 1827 ierr = PetscFree2(funcs,ctxs);CHKERRQ(ierr); 1828 } 1829 } 1830 PetscFunctionReturn(0); 1831 } 1832 1833 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1834 { 1835 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1836 PetscErrorCode ierr; 1837 IS nis; 1838 const PetscInt *idxs; 1839 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1840 PetscBool *ld; 1841 1842 PetscFunctionBegin; 1843 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1844 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1845 if (mop == MPI_LAND) { 1846 /* init rootdata with true */ 1847 ld = (PetscBool*) matis->sf_rootdata; 1848 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1849 } else { 1850 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1851 } 1852 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1853 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1854 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1855 ld = (PetscBool*) matis->sf_leafdata; 1856 for (i=0;i<nd;i++) 1857 if (-1 < idxs[i] && idxs[i] < n) 1858 ld[idxs[i]] = PETSC_TRUE; 1859 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1860 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1861 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1862 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1863 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1864 if (mop == MPI_LAND) { 1865 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1866 } else { 1867 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1868 } 1869 for (i=0,nnd=0;i<n;i++) 1870 if (ld[i]) 1871 nidxs[nnd++] = i; 1872 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1873 ierr = ISDestroy(is);CHKERRQ(ierr); 1874 *is = nis; 1875 PetscFunctionReturn(0); 1876 } 1877 1878 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1879 { 1880 PC_IS *pcis = (PC_IS*)(pc->data); 1881 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1882 PetscErrorCode ierr; 1883 1884 PetscFunctionBegin; 1885 if (!pcbddc->benign_have_null) { 1886 PetscFunctionReturn(0); 1887 } 1888 if (pcbddc->ChangeOfBasisMatrix) { 1889 Vec swap; 1890 1891 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1892 swap = pcbddc->work_change; 1893 pcbddc->work_change = r; 1894 r = swap; 1895 } 1896 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1897 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1898 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1899 ierr = VecSet(z,0.);CHKERRQ(ierr); 1900 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1901 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1902 if (pcbddc->ChangeOfBasisMatrix) { 1903 pcbddc->work_change = r; 1904 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1905 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1906 } 1907 PetscFunctionReturn(0); 1908 } 1909 1910 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1911 { 1912 PCBDDCBenignMatMult_ctx ctx; 1913 PetscErrorCode ierr; 1914 PetscBool apply_right,apply_left,reset_x; 1915 1916 PetscFunctionBegin; 1917 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1918 if (transpose) { 1919 apply_right = ctx->apply_left; 1920 apply_left = ctx->apply_right; 1921 } else { 1922 apply_right = ctx->apply_right; 1923 apply_left = ctx->apply_left; 1924 } 1925 reset_x = PETSC_FALSE; 1926 if (apply_right) { 1927 const PetscScalar *ax; 1928 PetscInt nl,i; 1929 1930 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1931 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1932 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1933 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1934 for (i=0;i<ctx->benign_n;i++) { 1935 PetscScalar sum,val; 1936 const PetscInt *idxs; 1937 PetscInt nz,j; 1938 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1939 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1940 sum = 0.; 1941 if (ctx->apply_p0) { 1942 val = ctx->work[idxs[nz-1]]; 1943 for (j=0;j<nz-1;j++) { 1944 sum += ctx->work[idxs[j]]; 1945 ctx->work[idxs[j]] += val; 1946 } 1947 } else { 1948 for (j=0;j<nz-1;j++) { 1949 sum += ctx->work[idxs[j]]; 1950 } 1951 } 1952 ctx->work[idxs[nz-1]] -= sum; 1953 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1954 } 1955 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1956 reset_x = PETSC_TRUE; 1957 } 1958 if (transpose) { 1959 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1960 } else { 1961 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1962 } 1963 if (reset_x) { 1964 ierr = VecResetArray(x);CHKERRQ(ierr); 1965 } 1966 if (apply_left) { 1967 PetscScalar *ay; 1968 PetscInt i; 1969 1970 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1971 for (i=0;i<ctx->benign_n;i++) { 1972 PetscScalar sum,val; 1973 const PetscInt *idxs; 1974 PetscInt nz,j; 1975 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1976 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1977 val = -ay[idxs[nz-1]]; 1978 if (ctx->apply_p0) { 1979 sum = 0.; 1980 for (j=0;j<nz-1;j++) { 1981 sum += ay[idxs[j]]; 1982 ay[idxs[j]] += val; 1983 } 1984 ay[idxs[nz-1]] += sum; 1985 } else { 1986 for (j=0;j<nz-1;j++) { 1987 ay[idxs[j]] += val; 1988 } 1989 ay[idxs[nz-1]] = 0.; 1990 } 1991 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1992 } 1993 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1994 } 1995 PetscFunctionReturn(0); 1996 } 1997 1998 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1999 { 2000 PetscErrorCode ierr; 2001 2002 PetscFunctionBegin; 2003 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 2004 PetscFunctionReturn(0); 2005 } 2006 2007 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2008 { 2009 PetscErrorCode ierr; 2010 2011 PetscFunctionBegin; 2012 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 2013 PetscFunctionReturn(0); 2014 } 2015 2016 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2017 { 2018 PC_IS *pcis = (PC_IS*)pc->data; 2019 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2020 PCBDDCBenignMatMult_ctx ctx; 2021 PetscErrorCode ierr; 2022 2023 PetscFunctionBegin; 2024 if (!restore) { 2025 Mat A_IB,A_BI; 2026 PetscScalar *work; 2027 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2028 2029 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2030 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2031 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 2032 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 2033 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2034 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 2035 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 2036 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 2037 ierr = PetscNew(&ctx);CHKERRQ(ierr); 2038 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 2039 ctx->apply_left = PETSC_TRUE; 2040 ctx->apply_right = PETSC_FALSE; 2041 ctx->apply_p0 = PETSC_FALSE; 2042 ctx->benign_n = pcbddc->benign_n; 2043 if (reuse) { 2044 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2045 ctx->free = PETSC_FALSE; 2046 } else { /* TODO: could be optimized for successive solves */ 2047 ISLocalToGlobalMapping N_to_D; 2048 PetscInt i; 2049 2050 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2051 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2052 for (i=0;i<pcbddc->benign_n;i++) { 2053 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2054 } 2055 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2056 ctx->free = PETSC_TRUE; 2057 } 2058 ctx->A = pcis->A_IB; 2059 ctx->work = work; 2060 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2061 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2062 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2063 pcis->A_IB = A_IB; 2064 2065 /* A_BI as A_IB^T */ 2066 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2067 pcbddc->benign_original_mat = pcis->A_BI; 2068 pcis->A_BI = A_BI; 2069 } else { 2070 if (!pcbddc->benign_original_mat) { 2071 PetscFunctionReturn(0); 2072 } 2073 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2074 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2075 pcis->A_IB = ctx->A; 2076 ctx->A = NULL; 2077 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2078 pcis->A_BI = pcbddc->benign_original_mat; 2079 pcbddc->benign_original_mat = NULL; 2080 if (ctx->free) { 2081 PetscInt i; 2082 for (i=0;i<ctx->benign_n;i++) { 2083 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2084 } 2085 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2086 } 2087 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2088 ierr = PetscFree(ctx);CHKERRQ(ierr); 2089 } 2090 PetscFunctionReturn(0); 2091 } 2092 2093 /* used just in bddc debug mode */ 2094 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2095 { 2096 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2097 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2098 Mat An; 2099 PetscErrorCode ierr; 2100 2101 PetscFunctionBegin; 2102 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2103 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2104 if (is1) { 2105 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2106 ierr = MatDestroy(&An);CHKERRQ(ierr); 2107 } else { 2108 *B = An; 2109 } 2110 PetscFunctionReturn(0); 2111 } 2112 2113 /* TODO: add reuse flag */ 2114 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2115 { 2116 Mat Bt; 2117 PetscScalar *a,*bdata; 2118 const PetscInt *ii,*ij; 2119 PetscInt m,n,i,nnz,*bii,*bij; 2120 PetscBool flg_row; 2121 PetscErrorCode ierr; 2122 2123 PetscFunctionBegin; 2124 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2125 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2126 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2127 nnz = n; 2128 for (i=0;i<ii[n];i++) { 2129 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2130 } 2131 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2132 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2133 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2134 nnz = 0; 2135 bii[0] = 0; 2136 for (i=0;i<n;i++) { 2137 PetscInt j; 2138 for (j=ii[i];j<ii[i+1];j++) { 2139 PetscScalar entry = a[j]; 2140 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2141 bij[nnz] = ij[j]; 2142 bdata[nnz] = entry; 2143 nnz++; 2144 } 2145 } 2146 bii[i+1] = nnz; 2147 } 2148 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2149 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2150 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2151 { 2152 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2153 b->free_a = PETSC_TRUE; 2154 b->free_ij = PETSC_TRUE; 2155 } 2156 if (*B == A) { 2157 ierr = MatDestroy(&A);CHKERRQ(ierr); 2158 } 2159 *B = Bt; 2160 PetscFunctionReturn(0); 2161 } 2162 2163 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscInt *ncc, IS* cc[], IS* primalv) 2164 { 2165 Mat B = NULL; 2166 DM dm; 2167 IS is_dummy,*cc_n; 2168 ISLocalToGlobalMapping l2gmap_dummy; 2169 PCBDDCGraph graph; 2170 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2171 PetscInt i,n; 2172 PetscInt *xadj,*adjncy; 2173 PetscBool isplex = PETSC_FALSE; 2174 PetscErrorCode ierr; 2175 2176 PetscFunctionBegin; 2177 if (ncc) *ncc = 0; 2178 if (cc) *cc = NULL; 2179 if (primalv) *primalv = NULL; 2180 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2181 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2182 if (!dm) { 2183 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2184 } 2185 if (dm) { 2186 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2187 } 2188 if (isplex) { /* this code has been modified from plexpartition.c */ 2189 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2190 PetscInt *adj = NULL; 2191 IS cellNumbering; 2192 const PetscInt *cellNum; 2193 PetscBool useCone, useClosure; 2194 PetscSection section; 2195 PetscSegBuffer adjBuffer; 2196 PetscSF sfPoint; 2197 PetscErrorCode ierr; 2198 2199 PetscFunctionBegin; 2200 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2201 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2202 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2203 /* Build adjacency graph via a section/segbuffer */ 2204 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2205 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2206 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2207 /* Always use FVM adjacency to create partitioner graph */ 2208 ierr = DMPlexGetAdjacencyUseCone(dm, &useCone);CHKERRQ(ierr); 2209 ierr = DMPlexGetAdjacencyUseClosure(dm, &useClosure);CHKERRQ(ierr); 2210 ierr = DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);CHKERRQ(ierr); 2211 ierr = DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);CHKERRQ(ierr); 2212 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2213 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2214 for (n = 0, p = pStart; p < pEnd; p++) { 2215 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2216 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2217 adjSize = PETSC_DETERMINE; 2218 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2219 for (a = 0; a < adjSize; ++a) { 2220 const PetscInt point = adj[a]; 2221 if (pStart <= point && point < pEnd) { 2222 PetscInt *PETSC_RESTRICT pBuf; 2223 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2224 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2225 *pBuf = point; 2226 } 2227 } 2228 n++; 2229 } 2230 ierr = DMPlexSetAdjacencyUseCone(dm, useCone);CHKERRQ(ierr); 2231 ierr = DMPlexSetAdjacencyUseClosure(dm, useClosure);CHKERRQ(ierr); 2232 /* Derive CSR graph from section/segbuffer */ 2233 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2234 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2235 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2236 for (idx = 0, p = pStart; p < pEnd; p++) { 2237 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2238 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2239 } 2240 xadj[n] = size; 2241 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2242 /* Clean up */ 2243 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2244 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2245 ierr = PetscFree(adj);CHKERRQ(ierr); 2246 graph->xadj = xadj; 2247 graph->adjncy = adjncy; 2248 } else { 2249 Mat A; 2250 PetscBool filter = PETSC_FALSE, isseqaij, flg_row; 2251 2252 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2253 if (!A->rmap->N || !A->cmap->N) { 2254 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2255 PetscFunctionReturn(0); 2256 } 2257 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2258 if (!isseqaij && filter) { 2259 PetscBool isseqdense; 2260 2261 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2262 if (!isseqdense) { 2263 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2264 } else { /* TODO: rectangular case and LDA */ 2265 PetscScalar *array; 2266 PetscReal chop=1.e-6; 2267 2268 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2269 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2270 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2271 for (i=0;i<n;i++) { 2272 PetscInt j; 2273 for (j=i+1;j<n;j++) { 2274 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2275 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2276 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2277 } 2278 } 2279 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2280 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2281 } 2282 } else { 2283 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2284 B = A; 2285 } 2286 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2287 2288 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2289 if (filter) { 2290 PetscScalar *data; 2291 PetscInt j,cum; 2292 2293 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2294 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2295 cum = 0; 2296 for (i=0;i<n;i++) { 2297 PetscInt t; 2298 2299 for (j=xadj[i];j<xadj[i+1];j++) { 2300 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2301 continue; 2302 } 2303 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2304 } 2305 t = xadj_filtered[i]; 2306 xadj_filtered[i] = cum; 2307 cum += t; 2308 } 2309 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2310 graph->xadj = xadj_filtered; 2311 graph->adjncy = adjncy_filtered; 2312 } else { 2313 graph->xadj = xadj; 2314 graph->adjncy = adjncy; 2315 } 2316 } 2317 /* compute local connected components using PCBDDCGraph */ 2318 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2319 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2320 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2321 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2322 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2323 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2324 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2325 2326 /* partial clean up */ 2327 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2328 if (B) { 2329 PetscBool flg_row; 2330 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2331 ierr = MatDestroy(&B);CHKERRQ(ierr); 2332 } 2333 if (isplex) { 2334 ierr = PetscFree(xadj);CHKERRQ(ierr); 2335 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2336 } 2337 2338 /* get back data */ 2339 if (isplex) { 2340 if (ncc) *ncc = graph->ncc; 2341 if (cc || primalv) { 2342 Mat A; 2343 PetscBT btv,btvt; 2344 PetscSection subSection; 2345 PetscInt *ids,cum,cump,*cids,*pids; 2346 2347 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2348 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2349 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2350 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2351 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2352 2353 cids[0] = 0; 2354 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2355 PetscInt j; 2356 2357 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2358 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2359 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2360 2361 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2362 for (k = 0; k < 2*size; k += 2) { 2363 PetscInt s, p = closure[k], off, dof, cdof; 2364 2365 ierr = PetscSectionGetConstraintDof(subSection, p, &cdof);CHKERRQ(ierr); 2366 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2367 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2368 for (s = 0; s < dof-cdof; s++) { 2369 if (PetscBTLookupSet(btvt,off+s)) continue; 2370 if (!PetscBTLookup(btv,off+s)) { 2371 ids[cum++] = off+s; 2372 } else { /* cross-vertex */ 2373 pids[cump++] = off+s; 2374 } 2375 } 2376 } 2377 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2378 } 2379 cids[i+1] = cum; 2380 /* mark dofs as already assigned */ 2381 for (j = cids[i]; j < cids[i+1]; j++) { 2382 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2383 } 2384 } 2385 if (cc) { 2386 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2387 for (i = 0; i < graph->ncc; i++) { 2388 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2389 } 2390 *cc = cc_n; 2391 } 2392 if (primalv) { 2393 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2394 } 2395 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2396 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2397 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2398 } 2399 } else { 2400 if (ncc) *ncc = graph->ncc; 2401 if (cc) { 2402 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2403 for (i=0;i<graph->ncc;i++) { 2404 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2405 } 2406 *cc = cc_n; 2407 } 2408 } 2409 /* clean up graph */ 2410 graph->xadj = 0; 2411 graph->adjncy = 0; 2412 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2413 PetscFunctionReturn(0); 2414 } 2415 2416 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2417 { 2418 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2419 PC_IS* pcis = (PC_IS*)(pc->data); 2420 IS dirIS = NULL; 2421 PetscInt i; 2422 PetscErrorCode ierr; 2423 2424 PetscFunctionBegin; 2425 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2426 if (zerodiag) { 2427 Mat A; 2428 Vec vec3_N; 2429 PetscScalar *vals; 2430 const PetscInt *idxs; 2431 PetscInt nz,*count; 2432 2433 /* p0 */ 2434 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2435 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2436 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2437 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2438 for (i=0;i<nz;i++) vals[i] = 1.; 2439 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2440 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2441 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2442 /* v_I */ 2443 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2444 for (i=0;i<nz;i++) vals[i] = 0.; 2445 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2446 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2447 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2448 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2449 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2450 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2451 if (dirIS) { 2452 PetscInt n; 2453 2454 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2455 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2456 for (i=0;i<n;i++) vals[i] = 0.; 2457 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2458 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2459 } 2460 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2461 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2462 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2463 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2464 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2465 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2466 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2467 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2468 ierr = PetscFree(vals);CHKERRQ(ierr); 2469 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2470 2471 /* there should not be any pressure dofs lying on the interface */ 2472 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2473 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2474 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2475 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2476 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2477 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2478 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2479 ierr = PetscFree(count);CHKERRQ(ierr); 2480 } 2481 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2482 2483 /* check PCBDDCBenignGetOrSetP0 */ 2484 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2485 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2486 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2487 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2488 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2489 for (i=0;i<pcbddc->benign_n;i++) { 2490 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2491 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2492 } 2493 PetscFunctionReturn(0); 2494 } 2495 2496 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2497 { 2498 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2499 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2500 PetscInt nz,n; 2501 PetscInt *interior_dofs,n_interior_dofs,nneu; 2502 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2503 PetscErrorCode ierr; 2504 2505 PetscFunctionBegin; 2506 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2507 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2508 for (n=0;n<pcbddc->benign_n;n++) { 2509 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2510 } 2511 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2512 pcbddc->benign_n = 0; 2513 2514 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2515 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2516 Checks if all the pressure dofs in each subdomain have a zero diagonal 2517 If not, a change of basis on pressures is not needed 2518 since the local Schur complements are already SPD 2519 */ 2520 has_null_pressures = PETSC_TRUE; 2521 have_null = PETSC_TRUE; 2522 if (pcbddc->n_ISForDofsLocal) { 2523 IS iP = NULL; 2524 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2525 2526 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2527 ierr = PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2528 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2529 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2530 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2531 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2532 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2533 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2534 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2535 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2536 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2537 if (iP) { 2538 IS newpressures; 2539 2540 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2541 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2542 pressures = newpressures; 2543 } 2544 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2545 if (!sorted) { 2546 ierr = ISSort(pressures);CHKERRQ(ierr); 2547 } 2548 } else { 2549 pressures = NULL; 2550 } 2551 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2552 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2553 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2554 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2555 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2556 if (!sorted) { 2557 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2558 } 2559 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2560 zerodiag_save = zerodiag; 2561 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2562 if (!nz) { 2563 if (n) have_null = PETSC_FALSE; 2564 has_null_pressures = PETSC_FALSE; 2565 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2566 } 2567 recompute_zerodiag = PETSC_FALSE; 2568 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2569 zerodiag_subs = NULL; 2570 pcbddc->benign_n = 0; 2571 n_interior_dofs = 0; 2572 interior_dofs = NULL; 2573 nneu = 0; 2574 if (pcbddc->NeumannBoundariesLocal) { 2575 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2576 } 2577 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2578 if (checkb) { /* need to compute interior nodes */ 2579 PetscInt n,i,j; 2580 PetscInt n_neigh,*neigh,*n_shared,**shared; 2581 PetscInt *iwork; 2582 2583 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2584 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2585 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2586 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2587 for (i=1;i<n_neigh;i++) 2588 for (j=0;j<n_shared[i];j++) 2589 iwork[shared[i][j]] += 1; 2590 for (i=0;i<n;i++) 2591 if (!iwork[i]) 2592 interior_dofs[n_interior_dofs++] = i; 2593 ierr = PetscFree(iwork);CHKERRQ(ierr); 2594 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2595 } 2596 if (has_null_pressures) { 2597 IS *subs; 2598 PetscInt nsubs,i,j,nl; 2599 const PetscInt *idxs; 2600 PetscScalar *array; 2601 Vec *work; 2602 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2603 2604 subs = pcbddc->local_subs; 2605 nsubs = pcbddc->n_local_subs; 2606 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2607 if (checkb) { 2608 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2609 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2610 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2611 /* work[0] = 1_p */ 2612 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2613 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2614 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2615 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2616 /* work[0] = 1_v */ 2617 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2618 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2619 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2620 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2621 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2622 } 2623 if (nsubs > 1) { 2624 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2625 for (i=0;i<nsubs;i++) { 2626 ISLocalToGlobalMapping l2g; 2627 IS t_zerodiag_subs; 2628 PetscInt nl; 2629 2630 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2631 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2632 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2633 if (nl) { 2634 PetscBool valid = PETSC_TRUE; 2635 2636 if (checkb) { 2637 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2638 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2639 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2640 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2641 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2642 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2643 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2644 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2645 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2646 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2647 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2648 for (j=0;j<n_interior_dofs;j++) { 2649 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2650 valid = PETSC_FALSE; 2651 break; 2652 } 2653 } 2654 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2655 } 2656 if (valid && nneu) { 2657 const PetscInt *idxs; 2658 PetscInt nzb; 2659 2660 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2661 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2662 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2663 if (nzb) valid = PETSC_FALSE; 2664 } 2665 if (valid && pressures) { 2666 IS t_pressure_subs; 2667 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2668 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2669 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2670 } 2671 if (valid) { 2672 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2673 pcbddc->benign_n++; 2674 } else { 2675 recompute_zerodiag = PETSC_TRUE; 2676 } 2677 } 2678 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2679 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2680 } 2681 } else { /* there's just one subdomain (or zero if they have not been detected */ 2682 PetscBool valid = PETSC_TRUE; 2683 2684 if (nneu) valid = PETSC_FALSE; 2685 if (valid && pressures) { 2686 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2687 } 2688 if (valid && checkb) { 2689 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2690 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2691 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2692 for (j=0;j<n_interior_dofs;j++) { 2693 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2694 valid = PETSC_FALSE; 2695 break; 2696 } 2697 } 2698 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2699 } 2700 if (valid) { 2701 pcbddc->benign_n = 1; 2702 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2703 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2704 zerodiag_subs[0] = zerodiag; 2705 } 2706 } 2707 if (checkb) { 2708 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2709 } 2710 } 2711 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2712 2713 if (!pcbddc->benign_n) { 2714 PetscInt n; 2715 2716 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2717 recompute_zerodiag = PETSC_FALSE; 2718 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2719 if (n) { 2720 has_null_pressures = PETSC_FALSE; 2721 have_null = PETSC_FALSE; 2722 } 2723 } 2724 2725 /* final check for null pressures */ 2726 if (zerodiag && pressures) { 2727 PetscInt nz,np; 2728 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2729 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2730 if (nz != np) have_null = PETSC_FALSE; 2731 } 2732 2733 if (recompute_zerodiag) { 2734 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2735 if (pcbddc->benign_n == 1) { 2736 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2737 zerodiag = zerodiag_subs[0]; 2738 } else { 2739 PetscInt i,nzn,*new_idxs; 2740 2741 nzn = 0; 2742 for (i=0;i<pcbddc->benign_n;i++) { 2743 PetscInt ns; 2744 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2745 nzn += ns; 2746 } 2747 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2748 nzn = 0; 2749 for (i=0;i<pcbddc->benign_n;i++) { 2750 PetscInt ns,*idxs; 2751 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2752 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2753 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2754 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2755 nzn += ns; 2756 } 2757 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2758 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2759 } 2760 have_null = PETSC_FALSE; 2761 } 2762 2763 /* Prepare matrix to compute no-net-flux */ 2764 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2765 Mat A,loc_divudotp; 2766 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2767 IS row,col,isused = NULL; 2768 PetscInt M,N,n,st,n_isused; 2769 2770 if (pressures) { 2771 isused = pressures; 2772 } else { 2773 isused = zerodiag_save; 2774 } 2775 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2776 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2777 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2778 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2779 n_isused = 0; 2780 if (isused) { 2781 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2782 } 2783 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2784 st = st-n_isused; 2785 if (n) { 2786 const PetscInt *gidxs; 2787 2788 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2789 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2790 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2791 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2792 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2793 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2794 } else { 2795 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2796 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2797 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2798 } 2799 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2800 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2801 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2802 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2803 ierr = ISDestroy(&row);CHKERRQ(ierr); 2804 ierr = ISDestroy(&col);CHKERRQ(ierr); 2805 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2806 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2807 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2808 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2809 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2810 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2811 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2812 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2813 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2814 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2815 } 2816 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2817 2818 /* change of basis and p0 dofs */ 2819 if (has_null_pressures) { 2820 IS zerodiagc; 2821 const PetscInt *idxs,*idxsc; 2822 PetscInt i,s,*nnz; 2823 2824 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2825 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2826 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2827 /* local change of basis for pressures */ 2828 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2829 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2830 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2831 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2832 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2833 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2834 for (i=0;i<pcbddc->benign_n;i++) { 2835 PetscInt nzs,j; 2836 2837 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2838 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2839 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2840 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2841 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2842 } 2843 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2844 ierr = PetscFree(nnz);CHKERRQ(ierr); 2845 /* set identity on velocities */ 2846 for (i=0;i<n-nz;i++) { 2847 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2848 } 2849 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2850 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2851 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2852 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2853 /* set change on pressures */ 2854 for (s=0;s<pcbddc->benign_n;s++) { 2855 PetscScalar *array; 2856 PetscInt nzs; 2857 2858 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2859 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2860 for (i=0;i<nzs-1;i++) { 2861 PetscScalar vals[2]; 2862 PetscInt cols[2]; 2863 2864 cols[0] = idxs[i]; 2865 cols[1] = idxs[nzs-1]; 2866 vals[0] = 1.; 2867 vals[1] = 1.; 2868 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2869 } 2870 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2871 for (i=0;i<nzs-1;i++) array[i] = -1.; 2872 array[nzs-1] = 1.; 2873 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2874 /* store local idxs for p0 */ 2875 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2876 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2877 ierr = PetscFree(array);CHKERRQ(ierr); 2878 } 2879 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2880 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2881 /* project if needed */ 2882 if (pcbddc->benign_change_explicit) { 2883 Mat M; 2884 2885 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2886 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2887 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2888 ierr = MatDestroy(&M);CHKERRQ(ierr); 2889 } 2890 /* store global idxs for p0 */ 2891 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2892 } 2893 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2894 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2895 2896 /* determines if the coarse solver will be singular or not */ 2897 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2898 /* determines if the problem has subdomains with 0 pressure block */ 2899 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2900 *zerodiaglocal = zerodiag; 2901 PetscFunctionReturn(0); 2902 } 2903 2904 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2905 { 2906 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2907 PetscScalar *array; 2908 PetscErrorCode ierr; 2909 2910 PetscFunctionBegin; 2911 if (!pcbddc->benign_sf) { 2912 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2913 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2914 } 2915 if (get) { 2916 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2917 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2918 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2919 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2920 } else { 2921 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2922 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2923 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2924 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2925 } 2926 PetscFunctionReturn(0); 2927 } 2928 2929 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2930 { 2931 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2932 PetscErrorCode ierr; 2933 2934 PetscFunctionBegin; 2935 /* TODO: add error checking 2936 - avoid nested pop (or push) calls. 2937 - cannot push before pop. 2938 - cannot call this if pcbddc->local_mat is NULL 2939 */ 2940 if (!pcbddc->benign_n) { 2941 PetscFunctionReturn(0); 2942 } 2943 if (pop) { 2944 if (pcbddc->benign_change_explicit) { 2945 IS is_p0; 2946 MatReuse reuse; 2947 2948 /* extract B_0 */ 2949 reuse = MAT_INITIAL_MATRIX; 2950 if (pcbddc->benign_B0) { 2951 reuse = MAT_REUSE_MATRIX; 2952 } 2953 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2954 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2955 /* remove rows and cols from local problem */ 2956 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2957 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2958 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2959 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2960 } else { 2961 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2962 PetscScalar *vals; 2963 PetscInt i,n,*idxs_ins; 2964 2965 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2966 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2967 if (!pcbddc->benign_B0) { 2968 PetscInt *nnz; 2969 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2970 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2971 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2972 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2973 for (i=0;i<pcbddc->benign_n;i++) { 2974 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2975 nnz[i] = n - nnz[i]; 2976 } 2977 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2978 ierr = PetscFree(nnz);CHKERRQ(ierr); 2979 } 2980 2981 for (i=0;i<pcbddc->benign_n;i++) { 2982 PetscScalar *array; 2983 PetscInt *idxs,j,nz,cum; 2984 2985 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2986 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2987 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2988 for (j=0;j<nz;j++) vals[j] = 1.; 2989 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2990 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2991 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2992 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2993 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2994 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2995 cum = 0; 2996 for (j=0;j<n;j++) { 2997 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2998 vals[cum] = array[j]; 2999 idxs_ins[cum] = j; 3000 cum++; 3001 } 3002 } 3003 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 3004 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 3005 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3006 } 3007 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3008 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3009 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 3010 } 3011 } else { /* push */ 3012 if (pcbddc->benign_change_explicit) { 3013 PetscInt i; 3014 3015 for (i=0;i<pcbddc->benign_n;i++) { 3016 PetscScalar *B0_vals; 3017 PetscInt *B0_cols,B0_ncol; 3018 3019 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3020 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3021 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3022 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 3023 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3024 } 3025 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3026 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3027 } else { 3028 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 3029 } 3030 } 3031 PetscFunctionReturn(0); 3032 } 3033 3034 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3035 { 3036 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3037 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3038 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3039 PetscBLASInt *B_iwork,*B_ifail; 3040 PetscScalar *work,lwork; 3041 PetscScalar *St,*S,*eigv; 3042 PetscScalar *Sarray,*Starray; 3043 PetscReal *eigs,thresh,lthresh,uthresh; 3044 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3045 PetscBool allocated_S_St; 3046 #if defined(PETSC_USE_COMPLEX) 3047 PetscReal *rwork; 3048 #endif 3049 PetscErrorCode ierr; 3050 3051 PetscFunctionBegin; 3052 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3053 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3054 if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3055 3056 if (pcbddc->dbg_flag) { 3057 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3058 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3059 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3060 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3061 } 3062 3063 if (pcbddc->dbg_flag) { 3064 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 3065 } 3066 3067 /* max size of subsets */ 3068 mss = 0; 3069 for (i=0;i<sub_schurs->n_subs;i++) { 3070 PetscInt subset_size; 3071 3072 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3073 mss = PetscMax(mss,subset_size); 3074 } 3075 3076 /* min/max and threshold */ 3077 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3078 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3079 nmax = PetscMax(nmin,nmax); 3080 allocated_S_St = PETSC_FALSE; 3081 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3082 allocated_S_St = PETSC_TRUE; 3083 } 3084 3085 /* allocate lapack workspace */ 3086 cum = cum2 = 0; 3087 maxneigs = 0; 3088 for (i=0;i<sub_schurs->n_subs;i++) { 3089 PetscInt n,subset_size; 3090 3091 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3092 n = PetscMin(subset_size,nmax); 3093 cum += subset_size; 3094 cum2 += subset_size*n; 3095 maxneigs = PetscMax(maxneigs,n); 3096 } 3097 if (mss) { 3098 if (sub_schurs->is_symmetric) { 3099 PetscBLASInt B_itype = 1; 3100 PetscBLASInt B_N = mss; 3101 PetscReal zero = 0.0; 3102 PetscReal eps = 0.0; /* dlamch? */ 3103 3104 B_lwork = -1; 3105 S = NULL; 3106 St = NULL; 3107 eigs = NULL; 3108 eigv = NULL; 3109 B_iwork = NULL; 3110 B_ifail = NULL; 3111 #if defined(PETSC_USE_COMPLEX) 3112 rwork = NULL; 3113 #endif 3114 thresh = 1.0; 3115 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3116 #if defined(PETSC_USE_COMPLEX) 3117 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3118 #else 3119 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3120 #endif 3121 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3122 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3123 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3124 } else { 3125 lwork = 0; 3126 } 3127 3128 nv = 0; 3129 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3130 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3131 } 3132 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3133 if (allocated_S_St) { 3134 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3135 } 3136 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3137 #if defined(PETSC_USE_COMPLEX) 3138 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3139 #endif 3140 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3141 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3142 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3143 nv+cum,&pcbddc->adaptive_constraints_idxs, 3144 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3145 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 3146 3147 maxneigs = 0; 3148 cum = cumarray = 0; 3149 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3150 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3151 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3152 const PetscInt *idxs; 3153 3154 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3155 for (cum=0;cum<nv;cum++) { 3156 pcbddc->adaptive_constraints_n[cum] = 1; 3157 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3158 pcbddc->adaptive_constraints_data[cum] = 1.0; 3159 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3160 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3161 } 3162 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3163 } 3164 3165 if (mss) { /* multilevel */ 3166 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3167 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3168 } 3169 3170 lthresh = pcbddc->adaptive_threshold[0]; 3171 uthresh = pcbddc->adaptive_threshold[1]; 3172 for (i=0;i<sub_schurs->n_subs;i++) { 3173 const PetscInt *idxs; 3174 PetscReal upper,lower; 3175 PetscInt j,subset_size,eigs_start = 0; 3176 PetscBLASInt B_N; 3177 PetscBool same_data = PETSC_FALSE; 3178 PetscBool scal = PETSC_FALSE; 3179 3180 if (pcbddc->use_deluxe_scaling) { 3181 upper = PETSC_MAX_REAL; 3182 lower = uthresh; 3183 } else { 3184 if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3185 upper = 1./uthresh; 3186 lower = 0.; 3187 } 3188 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3189 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3190 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3191 /* this is experimental: we assume the dofs have been properly grouped to have 3192 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3193 if (!sub_schurs->is_posdef) { 3194 Mat T; 3195 3196 for (j=0;j<subset_size;j++) { 3197 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3198 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);CHKERRQ(ierr); 3199 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3200 ierr = MatDestroy(&T);CHKERRQ(ierr); 3201 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);CHKERRQ(ierr); 3202 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3203 ierr = MatDestroy(&T);CHKERRQ(ierr); 3204 if (sub_schurs->change_primal_sub) { 3205 PetscInt nz,k; 3206 const PetscInt *idxs; 3207 3208 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);CHKERRQ(ierr); 3209 ierr = ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3210 for (k=0;k<nz;k++) { 3211 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3212 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3213 } 3214 ierr = ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3215 } 3216 scal = PETSC_TRUE; 3217 break; 3218 } 3219 } 3220 } 3221 3222 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3223 if (sub_schurs->is_symmetric) { 3224 PetscInt j,k; 3225 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 3226 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3227 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3228 } 3229 for (j=0;j<subset_size;j++) { 3230 for (k=j;k<subset_size;k++) { 3231 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3232 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3233 } 3234 } 3235 } else { 3236 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3237 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3238 } 3239 } else { 3240 S = Sarray + cumarray; 3241 St = Starray + cumarray; 3242 } 3243 /* see if we can save some work */ 3244 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3245 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 3246 } 3247 3248 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3249 B_neigs = 0; 3250 } else { 3251 if (sub_schurs->is_symmetric) { 3252 PetscBLASInt B_itype = 1; 3253 PetscBLASInt B_IL, B_IU; 3254 PetscReal eps = -1.0; /* dlamch? */ 3255 PetscInt nmin_s; 3256 PetscBool compute_range; 3257 3258 compute_range = (PetscBool)!same_data; 3259 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3260 3261 if (pcbddc->dbg_flag) { 3262 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d (range %d).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range);CHKERRQ(ierr); 3263 } 3264 3265 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3266 if (compute_range) { 3267 3268 /* ask for eigenvalues larger than thresh */ 3269 if (sub_schurs->is_posdef) { 3270 #if defined(PETSC_USE_COMPLEX) 3271 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3272 #else 3273 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3274 #endif 3275 } else { /* no theory so far, but it works nicely */ 3276 PetscInt recipe = 0; 3277 PetscReal bb[2]; 3278 3279 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);CHKERRQ(ierr); 3280 switch (recipe) { 3281 case 0: 3282 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3283 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3284 #if defined(PETSC_USE_COMPLEX) 3285 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3286 #else 3287 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3288 #endif 3289 break; 3290 case 1: 3291 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3292 #if defined(PETSC_USE_COMPLEX) 3293 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3294 #else 3295 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3296 #endif 3297 if (!scal) { 3298 PetscBLASInt B_neigs2; 3299 3300 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3301 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3302 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3303 #if defined(PETSC_USE_COMPLEX) 3304 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3305 #else 3306 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3307 #endif 3308 B_neigs += B_neigs2; 3309 } 3310 break; 3311 default: 3312 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe); 3313 break; 3314 } 3315 } 3316 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3317 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3318 B_IL = 1; 3319 #if defined(PETSC_USE_COMPLEX) 3320 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3321 #else 3322 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3323 #endif 3324 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3325 PetscInt k; 3326 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3327 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3328 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3329 nmin = nmax; 3330 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 3331 for (k=0;k<nmax;k++) { 3332 eigs[k] = 1./PETSC_SMALL; 3333 eigv[k*(subset_size+1)] = 1.0; 3334 } 3335 } 3336 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3337 if (B_ierr) { 3338 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3339 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3340 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3341 } 3342 3343 if (B_neigs > nmax) { 3344 if (pcbddc->dbg_flag) { 3345 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax);CHKERRQ(ierr); 3346 } 3347 if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax; 3348 B_neigs = nmax; 3349 } 3350 3351 nmin_s = PetscMin(nmin,B_N); 3352 if (B_neigs < nmin_s) { 3353 PetscBLASInt B_neigs2; 3354 3355 if (pcbddc->use_deluxe_scaling) { 3356 if (scal) { 3357 B_IU = nmin_s; 3358 B_IL = B_neigs + 1; 3359 } else { 3360 B_IL = B_N - nmin_s + 1; 3361 B_IU = B_N - B_neigs; 3362 } 3363 } else { 3364 B_IL = B_neigs + 1; 3365 B_IU = nmin_s; 3366 } 3367 if (pcbddc->dbg_flag) { 3368 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 3369 } 3370 if (sub_schurs->is_symmetric) { 3371 PetscInt j,k; 3372 for (j=0;j<subset_size;j++) { 3373 for (k=j;k<subset_size;k++) { 3374 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3375 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3376 } 3377 } 3378 } else { 3379 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3380 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3381 } 3382 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3383 #if defined(PETSC_USE_COMPLEX) 3384 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3385 #else 3386 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3387 #endif 3388 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3389 B_neigs += B_neigs2; 3390 } 3391 if (B_ierr) { 3392 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3393 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3394 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3395 } 3396 if (pcbddc->dbg_flag) { 3397 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3398 for (j=0;j<B_neigs;j++) { 3399 if (eigs[j] == 0.0) { 3400 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3401 } else { 3402 if (pcbddc->use_deluxe_scaling) { 3403 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3404 } else { 3405 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3406 } 3407 } 3408 } 3409 } 3410 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3411 } 3412 /* change the basis back to the original one */ 3413 if (sub_schurs->change) { 3414 Mat change,phi,phit; 3415 3416 if (pcbddc->dbg_flag > 2) { 3417 PetscInt ii; 3418 for (ii=0;ii<B_neigs;ii++) { 3419 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3420 for (j=0;j<B_N;j++) { 3421 #if defined(PETSC_USE_COMPLEX) 3422 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3423 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3424 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3425 #else 3426 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3427 #endif 3428 } 3429 } 3430 } 3431 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3432 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3433 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3434 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3435 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3436 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3437 } 3438 maxneigs = PetscMax(B_neigs,maxneigs); 3439 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3440 if (B_neigs) { 3441 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3442 3443 if (pcbddc->dbg_flag > 1) { 3444 PetscInt ii; 3445 for (ii=0;ii<B_neigs;ii++) { 3446 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3447 for (j=0;j<B_N;j++) { 3448 #if defined(PETSC_USE_COMPLEX) 3449 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3450 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3451 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3452 #else 3453 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3454 #endif 3455 } 3456 } 3457 } 3458 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3459 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3460 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3461 cum++; 3462 } 3463 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3464 /* shift for next computation */ 3465 cumarray += subset_size*subset_size; 3466 } 3467 if (pcbddc->dbg_flag) { 3468 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3469 } 3470 3471 if (mss) { 3472 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3473 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3474 /* destroy matrices (junk) */ 3475 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3476 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3477 } 3478 if (allocated_S_St) { 3479 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3480 } 3481 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3482 #if defined(PETSC_USE_COMPLEX) 3483 ierr = PetscFree(rwork);CHKERRQ(ierr); 3484 #endif 3485 if (pcbddc->dbg_flag) { 3486 PetscInt maxneigs_r; 3487 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3488 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3489 } 3490 PetscFunctionReturn(0); 3491 } 3492 3493 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3494 { 3495 PetscScalar *coarse_submat_vals; 3496 PetscErrorCode ierr; 3497 3498 PetscFunctionBegin; 3499 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3500 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3501 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3502 3503 /* Setup local neumann solver ksp_R */ 3504 /* PCBDDCSetUpLocalScatters should be called first! */ 3505 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3506 3507 /* 3508 Setup local correction and local part of coarse basis. 3509 Gives back the dense local part of the coarse matrix in column major ordering 3510 */ 3511 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3512 3513 /* Compute total number of coarse nodes and setup coarse solver */ 3514 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3515 3516 /* free */ 3517 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3518 PetscFunctionReturn(0); 3519 } 3520 3521 PetscErrorCode PCBDDCResetCustomization(PC pc) 3522 { 3523 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3524 PetscErrorCode ierr; 3525 3526 PetscFunctionBegin; 3527 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3528 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3529 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3530 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3531 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3532 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3533 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3534 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3535 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3536 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3537 PetscFunctionReturn(0); 3538 } 3539 3540 PetscErrorCode PCBDDCResetTopography(PC pc) 3541 { 3542 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3543 PetscInt i; 3544 PetscErrorCode ierr; 3545 3546 PetscFunctionBegin; 3547 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3548 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3549 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3550 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3551 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3552 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3553 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3554 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3555 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3556 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3557 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3558 for (i=0;i<pcbddc->n_local_subs;i++) { 3559 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3560 } 3561 pcbddc->n_local_subs = 0; 3562 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3563 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3564 pcbddc->graphanalyzed = PETSC_FALSE; 3565 pcbddc->recompute_topography = PETSC_TRUE; 3566 pcbddc->corner_selected = PETSC_FALSE; 3567 PetscFunctionReturn(0); 3568 } 3569 3570 PetscErrorCode PCBDDCResetSolvers(PC pc) 3571 { 3572 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3573 PetscErrorCode ierr; 3574 3575 PetscFunctionBegin; 3576 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3577 if (pcbddc->coarse_phi_B) { 3578 PetscScalar *array; 3579 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3580 ierr = PetscFree(array);CHKERRQ(ierr); 3581 } 3582 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3583 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3584 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3585 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3586 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3587 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3588 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3589 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3590 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3591 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3592 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3593 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3594 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3595 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3596 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3597 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3598 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3599 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3600 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3601 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3602 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3603 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3604 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3605 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3606 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3607 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3608 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3609 if (pcbddc->benign_zerodiag_subs) { 3610 PetscInt i; 3611 for (i=0;i<pcbddc->benign_n;i++) { 3612 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3613 } 3614 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3615 } 3616 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3617 PetscFunctionReturn(0); 3618 } 3619 3620 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3621 { 3622 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3623 PC_IS *pcis = (PC_IS*)pc->data; 3624 VecType impVecType; 3625 PetscInt n_constraints,n_R,old_size; 3626 PetscErrorCode ierr; 3627 3628 PetscFunctionBegin; 3629 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3630 n_R = pcis->n - pcbddc->n_vertices; 3631 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3632 /* local work vectors (try to avoid unneeded work)*/ 3633 /* R nodes */ 3634 old_size = -1; 3635 if (pcbddc->vec1_R) { 3636 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3637 } 3638 if (n_R != old_size) { 3639 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3640 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3641 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3642 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3643 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3644 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3645 } 3646 /* local primal dofs */ 3647 old_size = -1; 3648 if (pcbddc->vec1_P) { 3649 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3650 } 3651 if (pcbddc->local_primal_size != old_size) { 3652 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3653 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3654 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3655 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3656 } 3657 /* local explicit constraints */ 3658 old_size = -1; 3659 if (pcbddc->vec1_C) { 3660 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3661 } 3662 if (n_constraints && n_constraints != old_size) { 3663 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3664 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3665 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3666 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3667 } 3668 PetscFunctionReturn(0); 3669 } 3670 3671 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3672 { 3673 PetscErrorCode ierr; 3674 /* pointers to pcis and pcbddc */ 3675 PC_IS* pcis = (PC_IS*)pc->data; 3676 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3677 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3678 /* submatrices of local problem */ 3679 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3680 /* submatrices of local coarse problem */ 3681 Mat S_VV,S_CV,S_VC,S_CC; 3682 /* working matrices */ 3683 Mat C_CR; 3684 /* additional working stuff */ 3685 PC pc_R; 3686 Mat F,Brhs = NULL; 3687 Vec dummy_vec; 3688 PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs; 3689 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3690 PetscScalar *work; 3691 PetscInt *idx_V_B; 3692 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3693 PetscInt i,n_R,n_D,n_B; 3694 3695 /* some shortcuts to scalars */ 3696 PetscScalar one=1.0,m_one=-1.0; 3697 3698 PetscFunctionBegin; 3699 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3700 3701 /* Set Non-overlapping dimensions */ 3702 n_vertices = pcbddc->n_vertices; 3703 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3704 n_B = pcis->n_B; 3705 n_D = pcis->n - n_B; 3706 n_R = pcis->n - n_vertices; 3707 3708 /* vertices in boundary numbering */ 3709 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3710 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3711 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3712 3713 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3714 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3715 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3716 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3717 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3718 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3719 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3720 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3721 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3722 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3723 3724 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3725 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3726 ierr = PCSetUp(pc_R);CHKERRQ(ierr); 3727 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3728 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3729 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3730 lda_rhs = n_R; 3731 need_benign_correction = PETSC_FALSE; 3732 if (isLU || isILU || isCHOL) { 3733 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3734 } else if (sub_schurs && sub_schurs->reuse_solver) { 3735 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3736 MatFactorType type; 3737 3738 F = reuse_solver->F; 3739 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3740 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3741 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3742 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3743 } else { 3744 F = NULL; 3745 } 3746 3747 /* determine if we can use a sparse right-hand side */ 3748 sparserhs = PETSC_FALSE; 3749 if (F) { 3750 MatSolverType solver; 3751 3752 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3753 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3754 } 3755 3756 /* allocate workspace */ 3757 n = 0; 3758 if (n_constraints) { 3759 n += lda_rhs*n_constraints; 3760 } 3761 if (n_vertices) { 3762 n = PetscMax(2*lda_rhs*n_vertices,n); 3763 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3764 } 3765 if (!pcbddc->symmetric_primal) { 3766 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3767 } 3768 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3769 3770 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3771 dummy_vec = NULL; 3772 if (need_benign_correction && lda_rhs != n_R && F) { 3773 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3774 } 3775 3776 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3777 if (n_constraints) { 3778 Mat M3,C_B; 3779 IS is_aux; 3780 PetscScalar *array,*array2; 3781 3782 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3783 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3784 3785 /* Extract constraints on R nodes: C_{CR} */ 3786 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3787 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3788 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3789 3790 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3791 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3792 if (!sparserhs) { 3793 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3794 for (i=0;i<n_constraints;i++) { 3795 const PetscScalar *row_cmat_values; 3796 const PetscInt *row_cmat_indices; 3797 PetscInt size_of_constraint,j; 3798 3799 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3800 for (j=0;j<size_of_constraint;j++) { 3801 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3802 } 3803 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3804 } 3805 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 3806 } else { 3807 Mat tC_CR; 3808 3809 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3810 if (lda_rhs != n_R) { 3811 PetscScalar *aa; 3812 PetscInt r,*ii,*jj; 3813 PetscBool done; 3814 3815 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3816 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 3817 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 3818 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 3819 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3820 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 3821 } else { 3822 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 3823 tC_CR = C_CR; 3824 } 3825 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 3826 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 3827 } 3828 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3829 if (F) { 3830 if (need_benign_correction) { 3831 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3832 3833 /* rhs is already zero on interior dofs, no need to change the rhs */ 3834 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3835 } 3836 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 3837 if (need_benign_correction) { 3838 PetscScalar *marr; 3839 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3840 3841 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3842 if (lda_rhs != n_R) { 3843 for (i=0;i<n_constraints;i++) { 3844 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3845 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3846 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3847 } 3848 } else { 3849 for (i=0;i<n_constraints;i++) { 3850 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3851 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3852 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3853 } 3854 } 3855 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3856 } 3857 } else { 3858 PetscScalar *marr; 3859 3860 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3861 for (i=0;i<n_constraints;i++) { 3862 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3863 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3864 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3865 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3866 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3867 } 3868 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3869 } 3870 if (sparserhs) { 3871 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3872 } 3873 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3874 if (!pcbddc->switch_static) { 3875 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3876 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3877 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3878 for (i=0;i<n_constraints;i++) { 3879 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3880 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3881 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3882 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3883 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3884 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3885 } 3886 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3887 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3888 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3889 } else { 3890 if (lda_rhs != n_R) { 3891 IS dummy; 3892 3893 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3894 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3895 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3896 } else { 3897 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3898 pcbddc->local_auxmat2 = local_auxmat2_R; 3899 } 3900 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3901 } 3902 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3903 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3904 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3905 if (isCHOL) { 3906 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3907 } else { 3908 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3909 } 3910 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 3911 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3912 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3913 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3914 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3915 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3916 } 3917 3918 /* Get submatrices from subdomain matrix */ 3919 if (n_vertices) { 3920 IS is_aux; 3921 PetscBool isseqaij; 3922 3923 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3924 IS tis; 3925 3926 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3927 ierr = ISSort(tis);CHKERRQ(ierr); 3928 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3929 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3930 } else { 3931 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3932 } 3933 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3934 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3935 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3936 if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */ 3937 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3938 } 3939 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3940 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3941 } 3942 3943 /* Matrix of coarse basis functions (local) */ 3944 if (pcbddc->coarse_phi_B) { 3945 PetscInt on_B,on_primal,on_D=n_D; 3946 if (pcbddc->coarse_phi_D) { 3947 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3948 } 3949 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3950 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3951 PetscScalar *marray; 3952 3953 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3954 ierr = PetscFree(marray);CHKERRQ(ierr); 3955 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3956 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3957 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3958 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3959 } 3960 } 3961 3962 if (!pcbddc->coarse_phi_B) { 3963 PetscScalar *marr; 3964 3965 /* memory size */ 3966 n = n_B*pcbddc->local_primal_size; 3967 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 3968 if (!pcbddc->symmetric_primal) n *= 2; 3969 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 3970 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3971 marr += n_B*pcbddc->local_primal_size; 3972 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3973 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3974 marr += n_D*pcbddc->local_primal_size; 3975 } 3976 if (!pcbddc->symmetric_primal) { 3977 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3978 marr += n_B*pcbddc->local_primal_size; 3979 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3980 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3981 } 3982 } else { 3983 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3984 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3985 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3986 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3987 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3988 } 3989 } 3990 } 3991 3992 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3993 p0_lidx_I = NULL; 3994 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3995 const PetscInt *idxs; 3996 3997 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3998 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3999 for (i=0;i<pcbddc->benign_n;i++) { 4000 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 4001 } 4002 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4003 } 4004 4005 /* vertices */ 4006 if (n_vertices) { 4007 PetscBool restoreavr = PETSC_FALSE; 4008 4009 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 4010 4011 if (n_R) { 4012 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4013 PetscBLASInt B_N,B_one = 1; 4014 PetscScalar *x,*y; 4015 4016 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 4017 if (need_benign_correction) { 4018 ISLocalToGlobalMapping RtoN; 4019 IS is_p0; 4020 PetscInt *idxs_p0,n; 4021 4022 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 4023 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 4024 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 4025 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 4026 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 4027 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 4028 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 4029 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 4030 } 4031 4032 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4033 if (!sparserhs || need_benign_correction) { 4034 if (lda_rhs == n_R) { 4035 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4036 } else { 4037 PetscScalar *av,*array; 4038 const PetscInt *xadj,*adjncy; 4039 PetscInt n; 4040 PetscBool flg_row; 4041 4042 array = work+lda_rhs*n_vertices; 4043 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4044 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4045 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4046 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 4047 for (i=0;i<n;i++) { 4048 PetscInt j; 4049 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4050 } 4051 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4052 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4053 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 4054 } 4055 if (need_benign_correction) { 4056 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4057 PetscScalar *marr; 4058 4059 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 4060 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4061 4062 | 0 0 0 | (V) 4063 L = | 0 0 -1 | (P-p0) 4064 | 0 0 -1 | (p0) 4065 4066 */ 4067 for (i=0;i<reuse_solver->benign_n;i++) { 4068 const PetscScalar *vals; 4069 const PetscInt *idxs,*idxs_zero; 4070 PetscInt n,j,nz; 4071 4072 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4073 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4074 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4075 for (j=0;j<n;j++) { 4076 PetscScalar val = vals[j]; 4077 PetscInt k,col = idxs[j]; 4078 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4079 } 4080 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4081 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4082 } 4083 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 4084 } 4085 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 4086 Brhs = A_RV; 4087 } else { 4088 Mat tA_RVT,A_RVT; 4089 4090 if (!pcbddc->symmetric_primal) { 4091 /* A_RV already scaled by -1 */ 4092 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 4093 } else { 4094 restoreavr = PETSC_TRUE; 4095 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4096 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 4097 A_RVT = A_VR; 4098 } 4099 if (lda_rhs != n_R) { 4100 PetscScalar *aa; 4101 PetscInt r,*ii,*jj; 4102 PetscBool done; 4103 4104 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4105 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4106 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4107 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4108 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4109 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4110 } else { 4111 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4112 tA_RVT = A_RVT; 4113 } 4114 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4115 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4116 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4117 } 4118 if (F) { 4119 /* need to correct the rhs */ 4120 if (need_benign_correction) { 4121 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4122 PetscScalar *marr; 4123 4124 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4125 if (lda_rhs != n_R) { 4126 for (i=0;i<n_vertices;i++) { 4127 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4128 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4129 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4130 } 4131 } else { 4132 for (i=0;i<n_vertices;i++) { 4133 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4134 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4135 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4136 } 4137 } 4138 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4139 } 4140 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4141 if (restoreavr) { 4142 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4143 } 4144 /* need to correct the solution */ 4145 if (need_benign_correction) { 4146 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4147 PetscScalar *marr; 4148 4149 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4150 if (lda_rhs != n_R) { 4151 for (i=0;i<n_vertices;i++) { 4152 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4153 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4154 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4155 } 4156 } else { 4157 for (i=0;i<n_vertices;i++) { 4158 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4159 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4160 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4161 } 4162 } 4163 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4164 } 4165 } else { 4166 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4167 for (i=0;i<n_vertices;i++) { 4168 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4169 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4170 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4171 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4172 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4173 } 4174 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4175 } 4176 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4177 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4178 /* S_VV and S_CV */ 4179 if (n_constraints) { 4180 Mat B; 4181 4182 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4183 for (i=0;i<n_vertices;i++) { 4184 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4185 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4186 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4187 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4188 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4189 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4190 } 4191 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4192 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 4193 ierr = MatDestroy(&B);CHKERRQ(ierr); 4194 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4195 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4196 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4197 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4198 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4199 ierr = MatDestroy(&B);CHKERRQ(ierr); 4200 } 4201 if (lda_rhs != n_R) { 4202 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4203 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4204 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4205 } 4206 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4207 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4208 if (need_benign_correction) { 4209 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4210 PetscScalar *marr,*sums; 4211 4212 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4213 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4214 for (i=0;i<reuse_solver->benign_n;i++) { 4215 const PetscScalar *vals; 4216 const PetscInt *idxs,*idxs_zero; 4217 PetscInt n,j,nz; 4218 4219 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4220 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4221 for (j=0;j<n_vertices;j++) { 4222 PetscInt k; 4223 sums[j] = 0.; 4224 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4225 } 4226 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4227 for (j=0;j<n;j++) { 4228 PetscScalar val = vals[j]; 4229 PetscInt k; 4230 for (k=0;k<n_vertices;k++) { 4231 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4232 } 4233 } 4234 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4235 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4236 } 4237 ierr = PetscFree(sums);CHKERRQ(ierr); 4238 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4239 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4240 } 4241 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4242 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4243 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 4244 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4245 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4246 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 4247 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4248 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4249 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4250 } else { 4251 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4252 } 4253 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4254 4255 /* coarse basis functions */ 4256 for (i=0;i<n_vertices;i++) { 4257 PetscScalar *y; 4258 4259 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4260 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4261 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4262 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4263 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4264 y[n_B*i+idx_V_B[i]] = 1.0; 4265 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4266 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4267 4268 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4269 PetscInt j; 4270 4271 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4272 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4273 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4274 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4275 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4276 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4277 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4278 } 4279 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4280 } 4281 /* if n_R == 0 the object is not destroyed */ 4282 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4283 } 4284 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4285 4286 if (n_constraints) { 4287 Mat B; 4288 4289 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4290 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4291 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4292 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4293 if (n_vertices) { 4294 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4295 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4296 } else { 4297 Mat S_VCt; 4298 4299 if (lda_rhs != n_R) { 4300 ierr = MatDestroy(&B);CHKERRQ(ierr); 4301 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4302 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4303 } 4304 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4305 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4306 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4307 } 4308 } 4309 ierr = MatDestroy(&B);CHKERRQ(ierr); 4310 /* coarse basis functions */ 4311 for (i=0;i<n_constraints;i++) { 4312 PetscScalar *y; 4313 4314 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4315 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4316 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4317 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4318 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4319 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4320 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4321 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4322 PetscInt j; 4323 4324 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4325 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4326 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4327 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4328 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4329 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4330 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4331 } 4332 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4333 } 4334 } 4335 if (n_constraints) { 4336 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4337 } 4338 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4339 4340 /* coarse matrix entries relative to B_0 */ 4341 if (pcbddc->benign_n) { 4342 Mat B0_B,B0_BPHI; 4343 IS is_dummy; 4344 PetscScalar *data; 4345 PetscInt j; 4346 4347 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4348 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4349 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4350 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4351 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4352 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 4353 for (j=0;j<pcbddc->benign_n;j++) { 4354 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4355 for (i=0;i<pcbddc->local_primal_size;i++) { 4356 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4357 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4358 } 4359 } 4360 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 4361 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4362 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4363 } 4364 4365 /* compute other basis functions for non-symmetric problems */ 4366 if (!pcbddc->symmetric_primal) { 4367 Mat B_V=NULL,B_C=NULL; 4368 PetscScalar *marray; 4369 4370 if (n_constraints) { 4371 Mat S_CCT,C_CRT; 4372 4373 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4374 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4375 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4376 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4377 if (n_vertices) { 4378 Mat S_VCT; 4379 4380 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4381 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4382 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4383 } 4384 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4385 } else { 4386 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4387 } 4388 if (n_vertices && n_R) { 4389 PetscScalar *av,*marray; 4390 const PetscInt *xadj,*adjncy; 4391 PetscInt n; 4392 PetscBool flg_row; 4393 4394 /* B_V = B_V - A_VR^T */ 4395 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4396 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4397 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4398 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4399 for (i=0;i<n;i++) { 4400 PetscInt j; 4401 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4402 } 4403 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4404 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4405 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4406 } 4407 4408 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4409 if (n_vertices) { 4410 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4411 for (i=0;i<n_vertices;i++) { 4412 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4413 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4414 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4415 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4416 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4417 } 4418 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4419 } 4420 if (B_C) { 4421 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4422 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4423 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4424 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4425 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4426 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4427 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4428 } 4429 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4430 } 4431 /* coarse basis functions */ 4432 for (i=0;i<pcbddc->local_primal_size;i++) { 4433 PetscScalar *y; 4434 4435 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4436 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4437 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4438 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4439 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4440 if (i<n_vertices) { 4441 y[n_B*i+idx_V_B[i]] = 1.0; 4442 } 4443 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4444 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4445 4446 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4447 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4448 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4449 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4450 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4451 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4452 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4453 } 4454 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4455 } 4456 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4457 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4458 } 4459 4460 /* free memory */ 4461 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4462 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4463 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4464 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4465 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4466 ierr = PetscFree(work);CHKERRQ(ierr); 4467 if (n_vertices) { 4468 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4469 } 4470 if (n_constraints) { 4471 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4472 } 4473 /* Checking coarse_sub_mat and coarse basis functios */ 4474 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4475 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4476 if (pcbddc->dbg_flag) { 4477 Mat coarse_sub_mat; 4478 Mat AUXMAT,TM1,TM2,TM3,TM4; 4479 Mat coarse_phi_D,coarse_phi_B; 4480 Mat coarse_psi_D,coarse_psi_B; 4481 Mat A_II,A_BB,A_IB,A_BI; 4482 Mat C_B,CPHI; 4483 IS is_dummy; 4484 Vec mones; 4485 MatType checkmattype=MATSEQAIJ; 4486 PetscReal real_value; 4487 4488 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4489 Mat A; 4490 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4491 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4492 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4493 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4494 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4495 ierr = MatDestroy(&A);CHKERRQ(ierr); 4496 } else { 4497 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4498 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4499 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4500 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4501 } 4502 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4503 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4504 if (!pcbddc->symmetric_primal) { 4505 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4506 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4507 } 4508 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4509 4510 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4511 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4512 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4513 if (!pcbddc->symmetric_primal) { 4514 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4515 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4516 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4517 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4518 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4519 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4520 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4521 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4522 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4523 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4524 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4525 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4526 } else { 4527 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4528 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4529 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4530 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4531 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4532 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4533 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4534 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4535 } 4536 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4537 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4538 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4539 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4540 if (pcbddc->benign_n) { 4541 Mat B0_B,B0_BPHI; 4542 PetscScalar *data,*data2; 4543 PetscInt j; 4544 4545 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4546 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4547 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4548 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4549 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4550 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4551 for (j=0;j<pcbddc->benign_n;j++) { 4552 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4553 for (i=0;i<pcbddc->local_primal_size;i++) { 4554 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4555 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4556 } 4557 } 4558 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4559 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4560 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4561 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4562 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4563 } 4564 #if 0 4565 { 4566 PetscViewer viewer; 4567 char filename[256]; 4568 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4569 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4570 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4571 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4572 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4573 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4574 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4575 if (pcbddc->coarse_phi_B) { 4576 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4577 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4578 } 4579 if (pcbddc->coarse_phi_D) { 4580 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4581 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4582 } 4583 if (pcbddc->coarse_psi_B) { 4584 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4585 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4586 } 4587 if (pcbddc->coarse_psi_D) { 4588 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4589 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4590 } 4591 ierr = PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");CHKERRQ(ierr); 4592 ierr = MatView(pcbddc->local_mat,viewer);CHKERRQ(ierr); 4593 ierr = PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");CHKERRQ(ierr); 4594 ierr = MatView(pcbddc->ConstraintMatrix,viewer);CHKERRQ(ierr); 4595 ierr = PetscObjectSetName((PetscObject)pcis->is_I_local,"I");CHKERRQ(ierr); 4596 ierr = ISView(pcis->is_I_local,viewer);CHKERRQ(ierr); 4597 ierr = PetscObjectSetName((PetscObject)pcis->is_B_local,"B");CHKERRQ(ierr); 4598 ierr = ISView(pcis->is_B_local,viewer);CHKERRQ(ierr); 4599 ierr = PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");CHKERRQ(ierr); 4600 ierr = ISView(pcbddc->is_R_local,viewer);CHKERRQ(ierr); 4601 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4602 } 4603 #endif 4604 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4605 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4606 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4607 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4608 4609 /* check constraints */ 4610 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4611 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4612 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4613 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4614 } else { 4615 PetscScalar *data; 4616 Mat tmat; 4617 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4618 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4619 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4620 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4621 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4622 } 4623 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4624 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4625 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4626 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4627 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4628 if (!pcbddc->symmetric_primal) { 4629 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4630 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4631 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4632 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4633 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4634 } 4635 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4636 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4637 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4638 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4639 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4640 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4641 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4642 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4643 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4644 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4645 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4646 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4647 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4648 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4649 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4650 if (!pcbddc->symmetric_primal) { 4651 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4652 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4653 } 4654 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4655 } 4656 /* get back data */ 4657 *coarse_submat_vals_n = coarse_submat_vals; 4658 PetscFunctionReturn(0); 4659 } 4660 4661 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4662 { 4663 Mat *work_mat; 4664 IS isrow_s,iscol_s; 4665 PetscBool rsorted,csorted; 4666 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4667 PetscErrorCode ierr; 4668 4669 PetscFunctionBegin; 4670 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4671 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4672 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4673 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4674 4675 if (!rsorted) { 4676 const PetscInt *idxs; 4677 PetscInt *idxs_sorted,i; 4678 4679 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4680 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4681 for (i=0;i<rsize;i++) { 4682 idxs_perm_r[i] = i; 4683 } 4684 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4685 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4686 for (i=0;i<rsize;i++) { 4687 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4688 } 4689 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4690 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4691 } else { 4692 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4693 isrow_s = isrow; 4694 } 4695 4696 if (!csorted) { 4697 if (isrow == iscol) { 4698 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4699 iscol_s = isrow_s; 4700 } else { 4701 const PetscInt *idxs; 4702 PetscInt *idxs_sorted,i; 4703 4704 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4705 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4706 for (i=0;i<csize;i++) { 4707 idxs_perm_c[i] = i; 4708 } 4709 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4710 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4711 for (i=0;i<csize;i++) { 4712 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4713 } 4714 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4715 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4716 } 4717 } else { 4718 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4719 iscol_s = iscol; 4720 } 4721 4722 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4723 4724 if (!rsorted || !csorted) { 4725 Mat new_mat; 4726 IS is_perm_r,is_perm_c; 4727 4728 if (!rsorted) { 4729 PetscInt *idxs_r,i; 4730 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4731 for (i=0;i<rsize;i++) { 4732 idxs_r[idxs_perm_r[i]] = i; 4733 } 4734 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4735 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4736 } else { 4737 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4738 } 4739 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4740 4741 if (!csorted) { 4742 if (isrow_s == iscol_s) { 4743 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4744 is_perm_c = is_perm_r; 4745 } else { 4746 PetscInt *idxs_c,i; 4747 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4748 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4749 for (i=0;i<csize;i++) { 4750 idxs_c[idxs_perm_c[i]] = i; 4751 } 4752 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4753 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4754 } 4755 } else { 4756 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4757 } 4758 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4759 4760 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4761 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4762 work_mat[0] = new_mat; 4763 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4764 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4765 } 4766 4767 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4768 *B = work_mat[0]; 4769 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4770 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4771 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4772 PetscFunctionReturn(0); 4773 } 4774 4775 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4776 { 4777 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4778 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4779 Mat new_mat,lA; 4780 IS is_local,is_global; 4781 PetscInt local_size; 4782 PetscBool isseqaij; 4783 PetscErrorCode ierr; 4784 4785 PetscFunctionBegin; 4786 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4787 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4788 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4789 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4790 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4791 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4792 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4793 4794 /* check */ 4795 if (pcbddc->dbg_flag) { 4796 Vec x,x_change; 4797 PetscReal error; 4798 4799 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4800 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4801 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4802 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4803 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4804 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4805 if (!pcbddc->change_interior) { 4806 const PetscScalar *x,*y,*v; 4807 PetscReal lerror = 0.; 4808 PetscInt i; 4809 4810 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4811 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4812 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4813 for (i=0;i<local_size;i++) 4814 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4815 lerror = PetscAbsScalar(x[i]-y[i]); 4816 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4817 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4818 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4819 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4820 if (error > PETSC_SMALL) { 4821 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4822 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4823 } else { 4824 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4825 } 4826 } 4827 } 4828 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4829 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4830 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4831 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4832 if (error > PETSC_SMALL) { 4833 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4834 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4835 } else { 4836 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4837 } 4838 } 4839 ierr = VecDestroy(&x);CHKERRQ(ierr); 4840 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4841 } 4842 4843 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4844 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4845 4846 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4847 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4848 if (isseqaij) { 4849 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4850 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4851 if (lA) { 4852 Mat work; 4853 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4854 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4855 ierr = MatDestroy(&work);CHKERRQ(ierr); 4856 } 4857 } else { 4858 Mat work_mat; 4859 4860 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4861 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4862 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4863 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4864 if (lA) { 4865 Mat work; 4866 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4867 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4868 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4869 ierr = MatDestroy(&work);CHKERRQ(ierr); 4870 } 4871 } 4872 if (matis->A->symmetric_set) { 4873 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4874 #if !defined(PETSC_USE_COMPLEX) 4875 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4876 #endif 4877 } 4878 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4879 PetscFunctionReturn(0); 4880 } 4881 4882 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4883 { 4884 PC_IS* pcis = (PC_IS*)(pc->data); 4885 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4886 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4887 PetscInt *idx_R_local=NULL; 4888 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4889 PetscInt vbs,bs; 4890 PetscBT bitmask=NULL; 4891 PetscErrorCode ierr; 4892 4893 PetscFunctionBegin; 4894 /* 4895 No need to setup local scatters if 4896 - primal space is unchanged 4897 AND 4898 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4899 AND 4900 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4901 */ 4902 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4903 PetscFunctionReturn(0); 4904 } 4905 /* destroy old objects */ 4906 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4907 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4908 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4909 /* Set Non-overlapping dimensions */ 4910 n_B = pcis->n_B; 4911 n_D = pcis->n - n_B; 4912 n_vertices = pcbddc->n_vertices; 4913 4914 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4915 4916 /* create auxiliary bitmask and allocate workspace */ 4917 if (!sub_schurs || !sub_schurs->reuse_solver) { 4918 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4919 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4920 for (i=0;i<n_vertices;i++) { 4921 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4922 } 4923 4924 for (i=0, n_R=0; i<pcis->n; i++) { 4925 if (!PetscBTLookup(bitmask,i)) { 4926 idx_R_local[n_R++] = i; 4927 } 4928 } 4929 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4930 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4931 4932 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4933 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4934 } 4935 4936 /* Block code */ 4937 vbs = 1; 4938 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4939 if (bs>1 && !(n_vertices%bs)) { 4940 PetscBool is_blocked = PETSC_TRUE; 4941 PetscInt *vary; 4942 if (!sub_schurs || !sub_schurs->reuse_solver) { 4943 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4944 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4945 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4946 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4947 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4948 for (i=0; i<pcis->n/bs; i++) { 4949 if (vary[i]!=0 && vary[i]!=bs) { 4950 is_blocked = PETSC_FALSE; 4951 break; 4952 } 4953 } 4954 ierr = PetscFree(vary);CHKERRQ(ierr); 4955 } else { 4956 /* Verify directly the R set */ 4957 for (i=0; i<n_R/bs; i++) { 4958 PetscInt j,node=idx_R_local[bs*i]; 4959 for (j=1; j<bs; j++) { 4960 if (node != idx_R_local[bs*i+j]-j) { 4961 is_blocked = PETSC_FALSE; 4962 break; 4963 } 4964 } 4965 } 4966 } 4967 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4968 vbs = bs; 4969 for (i=0;i<n_R/vbs;i++) { 4970 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4971 } 4972 } 4973 } 4974 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4975 if (sub_schurs && sub_schurs->reuse_solver) { 4976 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4977 4978 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4979 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4980 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4981 reuse_solver->is_R = pcbddc->is_R_local; 4982 } else { 4983 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4984 } 4985 4986 /* print some info if requested */ 4987 if (pcbddc->dbg_flag) { 4988 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4989 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4990 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4991 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4992 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4993 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4994 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4995 } 4996 4997 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4998 if (!sub_schurs || !sub_schurs->reuse_solver) { 4999 IS is_aux1,is_aux2; 5000 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5001 5002 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5003 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 5004 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 5005 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5006 for (i=0; i<n_D; i++) { 5007 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 5008 } 5009 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5010 for (i=0, j=0; i<n_R; i++) { 5011 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5012 aux_array1[j++] = i; 5013 } 5014 } 5015 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5016 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5017 for (i=0, j=0; i<n_B; i++) { 5018 if (!PetscBTLookup(bitmask,is_indices[i])) { 5019 aux_array2[j++] = i; 5020 } 5021 } 5022 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5023 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 5024 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 5025 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5026 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 5027 5028 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5029 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 5030 for (i=0, j=0; i<n_R; i++) { 5031 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5032 aux_array1[j++] = i; 5033 } 5034 } 5035 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5036 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5037 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5038 } 5039 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 5040 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5041 } else { 5042 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5043 IS tis; 5044 PetscInt schur_size; 5045 5046 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 5047 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 5048 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 5049 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5050 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5051 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 5052 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5053 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5054 } 5055 } 5056 PetscFunctionReturn(0); 5057 } 5058 5059 5060 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5061 { 5062 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5063 PC_IS *pcis = (PC_IS*)pc->data; 5064 PC pc_temp; 5065 Mat A_RR; 5066 MatReuse reuse; 5067 PetscScalar m_one = -1.0; 5068 PetscReal value; 5069 PetscInt n_D,n_R; 5070 PetscBool check_corr,issbaij; 5071 PetscErrorCode ierr; 5072 /* prefixes stuff */ 5073 char dir_prefix[256],neu_prefix[256],str_level[16]; 5074 size_t len; 5075 5076 PetscFunctionBegin; 5077 5078 /* compute prefixes */ 5079 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 5080 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 5081 if (!pcbddc->current_level) { 5082 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5083 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5084 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 5085 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 5086 } else { 5087 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 5088 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5089 len -= 15; /* remove "pc_bddc_coarse_" */ 5090 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5091 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5092 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5093 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5094 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 5095 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 5096 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 5097 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 5098 } 5099 5100 /* DIRICHLET PROBLEM */ 5101 if (dirichlet) { 5102 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5103 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5104 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 5105 if (pcbddc->dbg_flag) { 5106 Mat A_IIn; 5107 5108 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5109 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5110 pcis->A_II = A_IIn; 5111 } 5112 } 5113 if (pcbddc->local_mat->symmetric_set) { 5114 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5115 } 5116 /* Matrix for Dirichlet problem is pcis->A_II */ 5117 n_D = pcis->n - pcis->n_B; 5118 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5119 void (*f)(void) = 0; 5120 5121 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5122 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5123 /* default */ 5124 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5125 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5126 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5127 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5128 if (issbaij) { 5129 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5130 } else { 5131 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5132 } 5133 /* Allow user's customization */ 5134 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5135 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5136 if (f && pcbddc->mat_graph->cloc) { 5137 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5138 const PetscInt *idxs; 5139 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5140 5141 ierr = ISGetLocalSize(pcis->is_I_local,&nl);CHKERRQ(ierr); 5142 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5143 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5144 for (i=0;i<nl;i++) { 5145 for (d=0;d<cdim;d++) { 5146 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5147 } 5148 } 5149 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5150 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5151 ierr = PetscFree(scoords);CHKERRQ(ierr); 5152 } 5153 } 5154 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 5155 if (sub_schurs && sub_schurs->reuse_solver) { 5156 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5157 5158 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5159 } 5160 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5161 if (!n_D) { 5162 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5163 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5164 } 5165 /* set ksp_D into pcis data */ 5166 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5167 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5168 pcis->ksp_D = pcbddc->ksp_D; 5169 } 5170 5171 /* NEUMANN PROBLEM */ 5172 A_RR = 0; 5173 if (neumann) { 5174 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5175 PetscInt ibs,mbs; 5176 PetscBool issbaij, reuse_neumann_solver; 5177 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5178 5179 reuse_neumann_solver = PETSC_FALSE; 5180 if (sub_schurs && sub_schurs->reuse_solver) { 5181 IS iP; 5182 5183 reuse_neumann_solver = PETSC_TRUE; 5184 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5185 if (iP) reuse_neumann_solver = PETSC_FALSE; 5186 } 5187 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5188 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5189 if (pcbddc->ksp_R) { /* already created ksp */ 5190 PetscInt nn_R; 5191 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5192 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5193 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5194 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5195 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5196 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5197 reuse = MAT_INITIAL_MATRIX; 5198 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5199 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5200 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5201 reuse = MAT_INITIAL_MATRIX; 5202 } else { /* safe to reuse the matrix */ 5203 reuse = MAT_REUSE_MATRIX; 5204 } 5205 } 5206 /* last check */ 5207 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5208 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5209 reuse = MAT_INITIAL_MATRIX; 5210 } 5211 } else { /* first time, so we need to create the matrix */ 5212 reuse = MAT_INITIAL_MATRIX; 5213 } 5214 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 5215 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5216 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5217 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5218 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5219 if (matis->A == pcbddc->local_mat) { 5220 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5221 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5222 } else { 5223 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5224 } 5225 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5226 if (matis->A == pcbddc->local_mat) { 5227 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5228 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5229 } else { 5230 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5231 } 5232 } 5233 /* extract A_RR */ 5234 if (reuse_neumann_solver) { 5235 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5236 5237 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5238 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5239 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5240 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5241 } else { 5242 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5243 } 5244 } else { 5245 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5246 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5247 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5248 } 5249 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5250 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5251 } 5252 if (pcbddc->local_mat->symmetric_set) { 5253 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5254 } 5255 if (!pcbddc->ksp_R) { /* create object if not present */ 5256 void (*f)(void) = 0; 5257 5258 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5259 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5260 /* default */ 5261 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5262 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5263 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5264 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5265 if (issbaij) { 5266 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5267 } else { 5268 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5269 } 5270 /* Allow user's customization */ 5271 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5272 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5273 if (f && pcbddc->mat_graph->cloc) { 5274 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5275 const PetscInt *idxs; 5276 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5277 5278 ierr = ISGetLocalSize(pcbddc->is_R_local,&nl);CHKERRQ(ierr); 5279 ierr = ISGetIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5280 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5281 for (i=0;i<nl;i++) { 5282 for (d=0;d<cdim;d++) { 5283 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5284 } 5285 } 5286 ierr = ISRestoreIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5287 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5288 ierr = PetscFree(scoords);CHKERRQ(ierr); 5289 } 5290 } 5291 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5292 if (!n_R) { 5293 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5294 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5295 } 5296 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5297 /* Reuse solver if it is present */ 5298 if (reuse_neumann_solver) { 5299 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5300 5301 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5302 } 5303 } 5304 5305 if (pcbddc->dbg_flag) { 5306 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5307 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5308 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5309 } 5310 5311 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5312 check_corr = PETSC_FALSE; 5313 if (pcbddc->NullSpace_corr[0]) { 5314 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5315 } 5316 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5317 check_corr = PETSC_TRUE; 5318 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5319 } 5320 if (neumann && pcbddc->NullSpace_corr[2]) { 5321 check_corr = PETSC_TRUE; 5322 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5323 } 5324 /* check Dirichlet and Neumann solvers */ 5325 if (pcbddc->dbg_flag) { 5326 if (dirichlet) { /* Dirichlet */ 5327 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5328 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5329 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5330 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5331 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5332 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5333 if (check_corr) { 5334 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 5335 } 5336 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5337 } 5338 if (neumann) { /* Neumann */ 5339 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5340 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5341 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5342 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5343 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5344 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5345 if (check_corr) { 5346 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 5347 } 5348 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5349 } 5350 } 5351 /* free Neumann problem's matrix */ 5352 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5353 PetscFunctionReturn(0); 5354 } 5355 5356 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5357 { 5358 PetscErrorCode ierr; 5359 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5360 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5361 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 5362 5363 PetscFunctionBegin; 5364 if (!reuse_solver) { 5365 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5366 } 5367 if (!pcbddc->switch_static) { 5368 if (applytranspose && pcbddc->local_auxmat1) { 5369 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5370 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5371 } 5372 if (!reuse_solver) { 5373 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5374 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5375 } else { 5376 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5377 5378 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5379 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5380 } 5381 } else { 5382 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5383 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5384 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5385 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5386 if (applytranspose && pcbddc->local_auxmat1) { 5387 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5388 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5389 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5390 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5391 } 5392 } 5393 if (!reuse_solver || pcbddc->switch_static) { 5394 if (applytranspose) { 5395 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5396 } else { 5397 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5398 } 5399 } else { 5400 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5401 5402 if (applytranspose) { 5403 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5404 } else { 5405 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5406 } 5407 } 5408 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5409 if (!pcbddc->switch_static) { 5410 if (!reuse_solver) { 5411 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5412 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5413 } else { 5414 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5415 5416 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5417 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5418 } 5419 if (!applytranspose && pcbddc->local_auxmat1) { 5420 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5421 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5422 } 5423 } else { 5424 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5425 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5426 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5427 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5428 if (!applytranspose && pcbddc->local_auxmat1) { 5429 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5430 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5431 } 5432 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5433 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5434 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5435 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5436 } 5437 PetscFunctionReturn(0); 5438 } 5439 5440 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5441 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5442 { 5443 PetscErrorCode ierr; 5444 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5445 PC_IS* pcis = (PC_IS*) (pc->data); 5446 const PetscScalar zero = 0.0; 5447 5448 PetscFunctionBegin; 5449 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5450 if (!pcbddc->benign_apply_coarse_only) { 5451 if (applytranspose) { 5452 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5453 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5454 } else { 5455 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5456 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5457 } 5458 } else { 5459 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5460 } 5461 5462 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5463 if (pcbddc->benign_n) { 5464 PetscScalar *array; 5465 PetscInt j; 5466 5467 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5468 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5469 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5470 } 5471 5472 /* start communications from local primal nodes to rhs of coarse solver */ 5473 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5474 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5475 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5476 5477 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5478 if (pcbddc->coarse_ksp) { 5479 Mat coarse_mat; 5480 Vec rhs,sol; 5481 MatNullSpace nullsp; 5482 PetscBool isbddc = PETSC_FALSE; 5483 5484 if (pcbddc->benign_have_null) { 5485 PC coarse_pc; 5486 5487 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5488 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5489 /* we need to propagate to coarser levels the need for a possible benign correction */ 5490 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5491 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5492 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5493 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5494 } 5495 } 5496 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5497 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5498 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5499 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5500 if (nullsp) { 5501 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5502 } 5503 if (applytranspose) { 5504 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5505 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5506 } else { 5507 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5508 PC coarse_pc; 5509 5510 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5511 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5512 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5513 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5514 } else { 5515 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5516 } 5517 } 5518 /* we don't need the benign correction at coarser levels anymore */ 5519 if (pcbddc->benign_have_null && isbddc) { 5520 PC coarse_pc; 5521 PC_BDDC* coarsepcbddc; 5522 5523 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5524 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5525 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5526 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5527 } 5528 if (nullsp) { 5529 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5530 } 5531 } 5532 5533 /* Local solution on R nodes */ 5534 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5535 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5536 } 5537 /* communications from coarse sol to local primal nodes */ 5538 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5539 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5540 5541 /* Sum contributions from the two levels */ 5542 if (!pcbddc->benign_apply_coarse_only) { 5543 if (applytranspose) { 5544 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5545 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5546 } else { 5547 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5548 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5549 } 5550 /* store p0 */ 5551 if (pcbddc->benign_n) { 5552 PetscScalar *array; 5553 PetscInt j; 5554 5555 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5556 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5557 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5558 } 5559 } else { /* expand the coarse solution */ 5560 if (applytranspose) { 5561 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5562 } else { 5563 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5564 } 5565 } 5566 PetscFunctionReturn(0); 5567 } 5568 5569 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5570 { 5571 PetscErrorCode ierr; 5572 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5573 PetscScalar *array; 5574 Vec from,to; 5575 5576 PetscFunctionBegin; 5577 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5578 from = pcbddc->coarse_vec; 5579 to = pcbddc->vec1_P; 5580 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5581 Vec tvec; 5582 5583 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5584 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5585 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5586 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5587 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5588 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5589 } 5590 } else { /* from local to global -> put data in coarse right hand side */ 5591 from = pcbddc->vec1_P; 5592 to = pcbddc->coarse_vec; 5593 } 5594 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5595 PetscFunctionReturn(0); 5596 } 5597 5598 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5599 { 5600 PetscErrorCode ierr; 5601 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5602 PetscScalar *array; 5603 Vec from,to; 5604 5605 PetscFunctionBegin; 5606 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5607 from = pcbddc->coarse_vec; 5608 to = pcbddc->vec1_P; 5609 } else { /* from local to global -> put data in coarse right hand side */ 5610 from = pcbddc->vec1_P; 5611 to = pcbddc->coarse_vec; 5612 } 5613 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5614 if (smode == SCATTER_FORWARD) { 5615 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5616 Vec tvec; 5617 5618 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5619 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5620 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5621 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5622 } 5623 } else { 5624 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5625 ierr = VecResetArray(from);CHKERRQ(ierr); 5626 } 5627 } 5628 PetscFunctionReturn(0); 5629 } 5630 5631 /* uncomment for testing purposes */ 5632 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5633 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5634 { 5635 PetscErrorCode ierr; 5636 PC_IS* pcis = (PC_IS*)(pc->data); 5637 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5638 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5639 /* one and zero */ 5640 PetscScalar one=1.0,zero=0.0; 5641 /* space to store constraints and their local indices */ 5642 PetscScalar *constraints_data; 5643 PetscInt *constraints_idxs,*constraints_idxs_B; 5644 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5645 PetscInt *constraints_n; 5646 /* iterators */ 5647 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5648 /* BLAS integers */ 5649 PetscBLASInt lwork,lierr; 5650 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5651 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5652 /* reuse */ 5653 PetscInt olocal_primal_size,olocal_primal_size_cc; 5654 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5655 /* change of basis */ 5656 PetscBool qr_needed; 5657 PetscBT change_basis,qr_needed_idx; 5658 /* auxiliary stuff */ 5659 PetscInt *nnz,*is_indices; 5660 PetscInt ncc; 5661 /* some quantities */ 5662 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5663 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5664 PetscReal tol; /* tolerance for retaining eigenmodes */ 5665 5666 PetscFunctionBegin; 5667 tol = PetscSqrtReal(PETSC_SMALL); 5668 /* Destroy Mat objects computed previously */ 5669 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5670 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5671 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5672 /* save info on constraints from previous setup (if any) */ 5673 olocal_primal_size = pcbddc->local_primal_size; 5674 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5675 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5676 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5677 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5678 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5679 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5680 5681 if (!pcbddc->adaptive_selection) { 5682 IS ISForVertices,*ISForFaces,*ISForEdges; 5683 MatNullSpace nearnullsp; 5684 const Vec *nearnullvecs; 5685 Vec *localnearnullsp; 5686 PetscScalar *array; 5687 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5688 PetscBool nnsp_has_cnst; 5689 /* LAPACK working arrays for SVD or POD */ 5690 PetscBool skip_lapack,boolforchange; 5691 PetscScalar *work; 5692 PetscReal *singular_vals; 5693 #if defined(PETSC_USE_COMPLEX) 5694 PetscReal *rwork; 5695 #endif 5696 #if defined(PETSC_MISSING_LAPACK_GESVD) 5697 PetscScalar *temp_basis,*correlation_mat; 5698 #else 5699 PetscBLASInt dummy_int=1; 5700 PetscScalar dummy_scalar=1.; 5701 #endif 5702 5703 /* Get index sets for faces, edges and vertices from graph */ 5704 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5705 /* print some info */ 5706 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5707 PetscInt nv; 5708 5709 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5710 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5711 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5712 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5713 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5714 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5715 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5716 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5717 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5718 } 5719 5720 /* free unneeded index sets */ 5721 if (!pcbddc->use_vertices) { 5722 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5723 } 5724 if (!pcbddc->use_edges) { 5725 for (i=0;i<n_ISForEdges;i++) { 5726 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5727 } 5728 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5729 n_ISForEdges = 0; 5730 } 5731 if (!pcbddc->use_faces) { 5732 for (i=0;i<n_ISForFaces;i++) { 5733 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5734 } 5735 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5736 n_ISForFaces = 0; 5737 } 5738 5739 /* check if near null space is attached to global mat */ 5740 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5741 if (nearnullsp) { 5742 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5743 /* remove any stored info */ 5744 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5745 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5746 /* store information for BDDC solver reuse */ 5747 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5748 pcbddc->onearnullspace = nearnullsp; 5749 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5750 for (i=0;i<nnsp_size;i++) { 5751 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5752 } 5753 } else { /* if near null space is not provided BDDC uses constants by default */ 5754 nnsp_size = 0; 5755 nnsp_has_cnst = PETSC_TRUE; 5756 } 5757 /* get max number of constraints on a single cc */ 5758 max_constraints = nnsp_size; 5759 if (nnsp_has_cnst) max_constraints++; 5760 5761 /* 5762 Evaluate maximum storage size needed by the procedure 5763 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5764 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5765 There can be multiple constraints per connected component 5766 */ 5767 n_vertices = 0; 5768 if (ISForVertices) { 5769 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5770 } 5771 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5772 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5773 5774 total_counts = n_ISForFaces+n_ISForEdges; 5775 total_counts *= max_constraints; 5776 total_counts += n_vertices; 5777 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5778 5779 total_counts = 0; 5780 max_size_of_constraint = 0; 5781 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5782 IS used_is; 5783 if (i<n_ISForEdges) { 5784 used_is = ISForEdges[i]; 5785 } else { 5786 used_is = ISForFaces[i-n_ISForEdges]; 5787 } 5788 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5789 total_counts += j; 5790 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5791 } 5792 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5793 5794 /* get local part of global near null space vectors */ 5795 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5796 for (k=0;k<nnsp_size;k++) { 5797 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5798 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5799 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5800 } 5801 5802 /* whether or not to skip lapack calls */ 5803 skip_lapack = PETSC_TRUE; 5804 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5805 5806 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5807 if (!skip_lapack) { 5808 PetscScalar temp_work; 5809 5810 #if defined(PETSC_MISSING_LAPACK_GESVD) 5811 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5812 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5813 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5814 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5815 #if defined(PETSC_USE_COMPLEX) 5816 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5817 #endif 5818 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5819 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5820 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5821 lwork = -1; 5822 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5823 #if !defined(PETSC_USE_COMPLEX) 5824 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5825 #else 5826 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5827 #endif 5828 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5829 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5830 #else /* on missing GESVD */ 5831 /* SVD */ 5832 PetscInt max_n,min_n; 5833 max_n = max_size_of_constraint; 5834 min_n = max_constraints; 5835 if (max_size_of_constraint < max_constraints) { 5836 min_n = max_size_of_constraint; 5837 max_n = max_constraints; 5838 } 5839 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5840 #if defined(PETSC_USE_COMPLEX) 5841 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5842 #endif 5843 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5844 lwork = -1; 5845 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5846 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5847 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5848 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5849 #if !defined(PETSC_USE_COMPLEX) 5850 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5851 #else 5852 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5853 #endif 5854 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5855 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5856 #endif /* on missing GESVD */ 5857 /* Allocate optimal workspace */ 5858 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5859 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5860 } 5861 /* Now we can loop on constraining sets */ 5862 total_counts = 0; 5863 constraints_idxs_ptr[0] = 0; 5864 constraints_data_ptr[0] = 0; 5865 /* vertices */ 5866 if (n_vertices) { 5867 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5868 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5869 for (i=0;i<n_vertices;i++) { 5870 constraints_n[total_counts] = 1; 5871 constraints_data[total_counts] = 1.0; 5872 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5873 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5874 total_counts++; 5875 } 5876 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5877 n_vertices = total_counts; 5878 } 5879 5880 /* edges and faces */ 5881 total_counts_cc = total_counts; 5882 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5883 IS used_is; 5884 PetscBool idxs_copied = PETSC_FALSE; 5885 5886 if (ncc<n_ISForEdges) { 5887 used_is = ISForEdges[ncc]; 5888 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5889 } else { 5890 used_is = ISForFaces[ncc-n_ISForEdges]; 5891 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5892 } 5893 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5894 5895 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5896 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5897 /* change of basis should not be performed on local periodic nodes */ 5898 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5899 if (nnsp_has_cnst) { 5900 PetscScalar quad_value; 5901 5902 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5903 idxs_copied = PETSC_TRUE; 5904 5905 if (!pcbddc->use_nnsp_true) { 5906 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5907 } else { 5908 quad_value = 1.0; 5909 } 5910 for (j=0;j<size_of_constraint;j++) { 5911 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5912 } 5913 temp_constraints++; 5914 total_counts++; 5915 } 5916 for (k=0;k<nnsp_size;k++) { 5917 PetscReal real_value; 5918 PetscScalar *ptr_to_data; 5919 5920 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5921 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5922 for (j=0;j<size_of_constraint;j++) { 5923 ptr_to_data[j] = array[is_indices[j]]; 5924 } 5925 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5926 /* check if array is null on the connected component */ 5927 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5928 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5929 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 5930 temp_constraints++; 5931 total_counts++; 5932 if (!idxs_copied) { 5933 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5934 idxs_copied = PETSC_TRUE; 5935 } 5936 } 5937 } 5938 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5939 valid_constraints = temp_constraints; 5940 if (!pcbddc->use_nnsp_true && temp_constraints) { 5941 if (temp_constraints == 1) { /* just normalize the constraint */ 5942 PetscScalar norm,*ptr_to_data; 5943 5944 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5945 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5946 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5947 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5948 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5949 } else { /* perform SVD */ 5950 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5951 5952 #if defined(PETSC_MISSING_LAPACK_GESVD) 5953 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5954 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5955 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5956 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5957 from that computed using LAPACKgesvd 5958 -> This is due to a different computation of eigenvectors in LAPACKheev 5959 -> The quality of the POD-computed basis will be the same */ 5960 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5961 /* Store upper triangular part of correlation matrix */ 5962 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5963 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5964 for (j=0;j<temp_constraints;j++) { 5965 for (k=0;k<j+1;k++) { 5966 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5967 } 5968 } 5969 /* compute eigenvalues and eigenvectors of correlation matrix */ 5970 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5971 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5972 #if !defined(PETSC_USE_COMPLEX) 5973 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5974 #else 5975 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5976 #endif 5977 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5978 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5979 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5980 j = 0; 5981 while (j < temp_constraints && singular_vals[j] < tol) j++; 5982 total_counts = total_counts-j; 5983 valid_constraints = temp_constraints-j; 5984 /* scale and copy POD basis into used quadrature memory */ 5985 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5986 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5987 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5988 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5989 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5990 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5991 if (j<temp_constraints) { 5992 PetscInt ii; 5993 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5994 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5995 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5996 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5997 for (k=0;k<temp_constraints-j;k++) { 5998 for (ii=0;ii<size_of_constraint;ii++) { 5999 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6000 } 6001 } 6002 } 6003 #else /* on missing GESVD */ 6004 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6005 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6006 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6007 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6008 #if !defined(PETSC_USE_COMPLEX) 6009 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6010 #else 6011 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6012 #endif 6013 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6014 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6015 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6016 k = temp_constraints; 6017 if (k > size_of_constraint) k = size_of_constraint; 6018 j = 0; 6019 while (j < k && singular_vals[k-j-1] < tol) j++; 6020 valid_constraints = k-j; 6021 total_counts = total_counts-temp_constraints+valid_constraints; 6022 #endif /* on missing GESVD */ 6023 } 6024 } 6025 /* update pointers information */ 6026 if (valid_constraints) { 6027 constraints_n[total_counts_cc] = valid_constraints; 6028 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6029 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6030 /* set change_of_basis flag */ 6031 if (boolforchange) { 6032 PetscBTSet(change_basis,total_counts_cc); 6033 } 6034 total_counts_cc++; 6035 } 6036 } 6037 /* free workspace */ 6038 if (!skip_lapack) { 6039 ierr = PetscFree(work);CHKERRQ(ierr); 6040 #if defined(PETSC_USE_COMPLEX) 6041 ierr = PetscFree(rwork);CHKERRQ(ierr); 6042 #endif 6043 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 6044 #if defined(PETSC_MISSING_LAPACK_GESVD) 6045 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 6046 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 6047 #endif 6048 } 6049 for (k=0;k<nnsp_size;k++) { 6050 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 6051 } 6052 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 6053 /* free index sets of faces, edges and vertices */ 6054 for (i=0;i<n_ISForFaces;i++) { 6055 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6056 } 6057 if (n_ISForFaces) { 6058 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6059 } 6060 for (i=0;i<n_ISForEdges;i++) { 6061 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6062 } 6063 if (n_ISForEdges) { 6064 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6065 } 6066 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6067 } else { 6068 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6069 6070 total_counts = 0; 6071 n_vertices = 0; 6072 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6073 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6074 } 6075 max_constraints = 0; 6076 total_counts_cc = 0; 6077 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6078 total_counts += pcbddc->adaptive_constraints_n[i]; 6079 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6080 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6081 } 6082 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6083 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6084 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6085 constraints_data = pcbddc->adaptive_constraints_data; 6086 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6087 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 6088 total_counts_cc = 0; 6089 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6090 if (pcbddc->adaptive_constraints_n[i]) { 6091 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6092 } 6093 } 6094 #if 0 6095 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 6096 for (i=0;i<total_counts_cc;i++) { 6097 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 6098 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 6099 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 6100 printf(" %d",constraints_idxs[j]); 6101 } 6102 printf("\n"); 6103 printf("number of cc: %d\n",constraints_n[i]); 6104 } 6105 for (i=0;i<n_vertices;i++) { 6106 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 6107 } 6108 for (i=0;i<sub_schurs->n_subs;i++) { 6109 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 6110 } 6111 #endif 6112 6113 max_size_of_constraint = 0; 6114 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6115 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 6116 /* Change of basis */ 6117 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 6118 if (pcbddc->use_change_of_basis) { 6119 for (i=0;i<sub_schurs->n_subs;i++) { 6120 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6121 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 6122 } 6123 } 6124 } 6125 } 6126 pcbddc->local_primal_size = total_counts; 6127 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6128 6129 /* map constraints_idxs in boundary numbering */ 6130 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 6131 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 6132 6133 /* Create constraint matrix */ 6134 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6135 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 6136 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 6137 6138 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6139 /* determine if a QR strategy is needed for change of basis */ 6140 qr_needed = PETSC_FALSE; 6141 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6142 total_primal_vertices=0; 6143 pcbddc->local_primal_size_cc = 0; 6144 for (i=0;i<total_counts_cc;i++) { 6145 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6146 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6147 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6148 pcbddc->local_primal_size_cc += 1; 6149 } else if (PetscBTLookup(change_basis,i)) { 6150 for (k=0;k<constraints_n[i];k++) { 6151 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6152 } 6153 pcbddc->local_primal_size_cc += constraints_n[i]; 6154 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6155 PetscBTSet(qr_needed_idx,i); 6156 qr_needed = PETSC_TRUE; 6157 } 6158 } else { 6159 pcbddc->local_primal_size_cc += 1; 6160 } 6161 } 6162 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6163 pcbddc->n_vertices = total_primal_vertices; 6164 /* permute indices in order to have a sorted set of vertices */ 6165 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6166 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6167 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 6168 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6169 6170 /* nonzero structure of constraint matrix */ 6171 /* and get reference dof for local constraints */ 6172 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6173 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6174 6175 j = total_primal_vertices; 6176 total_counts = total_primal_vertices; 6177 cum = total_primal_vertices; 6178 for (i=n_vertices;i<total_counts_cc;i++) { 6179 if (!PetscBTLookup(change_basis,i)) { 6180 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6181 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6182 cum++; 6183 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6184 for (k=0;k<constraints_n[i];k++) { 6185 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6186 nnz[j+k] = size_of_constraint; 6187 } 6188 j += constraints_n[i]; 6189 } 6190 } 6191 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6192 ierr = PetscFree(nnz);CHKERRQ(ierr); 6193 6194 /* set values in constraint matrix */ 6195 for (i=0;i<total_primal_vertices;i++) { 6196 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6197 } 6198 total_counts = total_primal_vertices; 6199 for (i=n_vertices;i<total_counts_cc;i++) { 6200 if (!PetscBTLookup(change_basis,i)) { 6201 PetscInt *cols; 6202 6203 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6204 cols = constraints_idxs+constraints_idxs_ptr[i]; 6205 for (k=0;k<constraints_n[i];k++) { 6206 PetscInt row = total_counts+k; 6207 PetscScalar *vals; 6208 6209 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6210 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6211 } 6212 total_counts += constraints_n[i]; 6213 } 6214 } 6215 /* assembling */ 6216 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6217 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6218 ierr = MatChop(pcbddc->ConstraintMatrix,PETSC_SMALL);CHKERRQ(ierr); 6219 ierr = MatSeqAIJCompress(pcbddc->ConstraintMatrix,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6220 ierr = MatViewFromOptions(pcbddc->ConstraintMatrix,NULL,"-pc_bddc_constraint_mat_view");CHKERRQ(ierr); 6221 6222 /* 6223 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6224 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 6225 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 6226 */ 6227 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6228 if (pcbddc->use_change_of_basis) { 6229 /* dual and primal dofs on a single cc */ 6230 PetscInt dual_dofs,primal_dofs; 6231 /* working stuff for GEQRF */ 6232 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 6233 PetscBLASInt lqr_work; 6234 /* working stuff for UNGQR */ 6235 PetscScalar *gqr_work,lgqr_work_t; 6236 PetscBLASInt lgqr_work; 6237 /* working stuff for TRTRS */ 6238 PetscScalar *trs_rhs; 6239 PetscBLASInt Blas_NRHS; 6240 /* pointers for values insertion into change of basis matrix */ 6241 PetscInt *start_rows,*start_cols; 6242 PetscScalar *start_vals; 6243 /* working stuff for values insertion */ 6244 PetscBT is_primal; 6245 PetscInt *aux_primal_numbering_B; 6246 /* matrix sizes */ 6247 PetscInt global_size,local_size; 6248 /* temporary change of basis */ 6249 Mat localChangeOfBasisMatrix; 6250 /* extra space for debugging */ 6251 PetscScalar *dbg_work; 6252 6253 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6254 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6255 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6256 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6257 /* nonzeros for local mat */ 6258 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6259 if (!pcbddc->benign_change || pcbddc->fake_change) { 6260 for (i=0;i<pcis->n;i++) nnz[i]=1; 6261 } else { 6262 const PetscInt *ii; 6263 PetscInt n; 6264 PetscBool flg_row; 6265 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6266 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6267 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6268 } 6269 for (i=n_vertices;i<total_counts_cc;i++) { 6270 if (PetscBTLookup(change_basis,i)) { 6271 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6272 if (PetscBTLookup(qr_needed_idx,i)) { 6273 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6274 } else { 6275 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6276 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6277 } 6278 } 6279 } 6280 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6281 ierr = PetscFree(nnz);CHKERRQ(ierr); 6282 /* Set interior change in the matrix */ 6283 if (!pcbddc->benign_change || pcbddc->fake_change) { 6284 for (i=0;i<pcis->n;i++) { 6285 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6286 } 6287 } else { 6288 const PetscInt *ii,*jj; 6289 PetscScalar *aa; 6290 PetscInt n; 6291 PetscBool flg_row; 6292 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6293 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6294 for (i=0;i<n;i++) { 6295 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6296 } 6297 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6298 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6299 } 6300 6301 if (pcbddc->dbg_flag) { 6302 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6303 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6304 } 6305 6306 6307 /* Now we loop on the constraints which need a change of basis */ 6308 /* 6309 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6310 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6311 6312 Basic blocks of change of basis matrix T computed by 6313 6314 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6315 6316 | 1 0 ... 0 s_1/S | 6317 | 0 1 ... 0 s_2/S | 6318 | ... | 6319 | 0 ... 1 s_{n-1}/S | 6320 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6321 6322 with S = \sum_{i=1}^n s_i^2 6323 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6324 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6325 6326 - QR decomposition of constraints otherwise 6327 */ 6328 if (qr_needed) { 6329 /* space to store Q */ 6330 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6331 /* array to store scaling factors for reflectors */ 6332 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6333 /* first we issue queries for optimal work */ 6334 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6335 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6336 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6337 lqr_work = -1; 6338 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6339 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6340 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6341 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6342 lgqr_work = -1; 6343 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6344 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6345 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6346 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6347 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6348 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6349 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6350 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6351 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6352 /* array to store rhs and solution of triangular solver */ 6353 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6354 /* allocating workspace for check */ 6355 if (pcbddc->dbg_flag) { 6356 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6357 } 6358 } 6359 /* array to store whether a node is primal or not */ 6360 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6361 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6362 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6363 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 6364 for (i=0;i<total_primal_vertices;i++) { 6365 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6366 } 6367 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6368 6369 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6370 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6371 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6372 if (PetscBTLookup(change_basis,total_counts)) { 6373 /* get constraint info */ 6374 primal_dofs = constraints_n[total_counts]; 6375 dual_dofs = size_of_constraint-primal_dofs; 6376 6377 if (pcbddc->dbg_flag) { 6378 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6379 } 6380 6381 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6382 6383 /* copy quadrature constraints for change of basis check */ 6384 if (pcbddc->dbg_flag) { 6385 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6386 } 6387 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6388 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6389 6390 /* compute QR decomposition of constraints */ 6391 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6392 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6393 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6394 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6395 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6396 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6397 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6398 6399 /* explictly compute R^-T */ 6400 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 6401 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6402 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6403 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6404 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6405 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6406 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6407 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6408 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6409 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6410 6411 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6412 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6413 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6414 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6415 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6416 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6417 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6418 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6419 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6420 6421 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6422 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6423 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6424 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6425 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6426 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6427 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6428 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6429 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6430 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6431 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6432 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6433 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6434 6435 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6436 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6437 /* insert cols for primal dofs */ 6438 for (j=0;j<primal_dofs;j++) { 6439 start_vals = &qr_basis[j*size_of_constraint]; 6440 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6441 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6442 } 6443 /* insert cols for dual dofs */ 6444 for (j=0,k=0;j<dual_dofs;k++) { 6445 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6446 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6447 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6448 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6449 j++; 6450 } 6451 } 6452 6453 /* check change of basis */ 6454 if (pcbddc->dbg_flag) { 6455 PetscInt ii,jj; 6456 PetscBool valid_qr=PETSC_TRUE; 6457 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6458 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6459 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6460 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6461 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6462 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6463 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6464 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6465 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6466 for (jj=0;jj<size_of_constraint;jj++) { 6467 for (ii=0;ii<primal_dofs;ii++) { 6468 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6469 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6470 } 6471 } 6472 if (!valid_qr) { 6473 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6474 for (jj=0;jj<size_of_constraint;jj++) { 6475 for (ii=0;ii<primal_dofs;ii++) { 6476 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6477 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6478 } 6479 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6480 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6481 } 6482 } 6483 } 6484 } else { 6485 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6486 } 6487 } 6488 } else { /* simple transformation block */ 6489 PetscInt row,col; 6490 PetscScalar val,norm; 6491 6492 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6493 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6494 for (j=0;j<size_of_constraint;j++) { 6495 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6496 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6497 if (!PetscBTLookup(is_primal,row_B)) { 6498 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6499 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6500 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6501 } else { 6502 for (k=0;k<size_of_constraint;k++) { 6503 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6504 if (row != col) { 6505 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6506 } else { 6507 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6508 } 6509 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6510 } 6511 } 6512 } 6513 if (pcbddc->dbg_flag) { 6514 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6515 } 6516 } 6517 } else { 6518 if (pcbddc->dbg_flag) { 6519 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6520 } 6521 } 6522 } 6523 6524 /* free workspace */ 6525 if (qr_needed) { 6526 if (pcbddc->dbg_flag) { 6527 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6528 } 6529 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6530 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6531 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6532 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6533 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6534 } 6535 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6536 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6537 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6538 6539 /* assembling of global change of variable */ 6540 if (!pcbddc->fake_change) { 6541 Mat tmat; 6542 PetscInt bs; 6543 6544 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6545 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6546 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6547 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6548 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6549 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6550 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6551 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6552 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6553 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6554 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6555 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6556 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6557 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6558 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6559 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6560 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6561 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6562 6563 /* check */ 6564 if (pcbddc->dbg_flag) { 6565 PetscReal error; 6566 Vec x,x_change; 6567 6568 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6569 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6570 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6571 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6572 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6573 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6574 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6575 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6576 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6577 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6578 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6579 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6580 if (error > PETSC_SMALL) { 6581 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6582 } 6583 ierr = VecDestroy(&x);CHKERRQ(ierr); 6584 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6585 } 6586 /* adapt sub_schurs computed (if any) */ 6587 if (pcbddc->use_deluxe_scaling) { 6588 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6589 6590 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6591 if (sub_schurs && sub_schurs->S_Ej_all) { 6592 Mat S_new,tmat; 6593 IS is_all_N,is_V_Sall = NULL; 6594 6595 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6596 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6597 if (pcbddc->deluxe_zerorows) { 6598 ISLocalToGlobalMapping NtoSall; 6599 IS is_V; 6600 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6601 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6602 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6603 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6604 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6605 } 6606 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6607 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6608 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6609 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6610 if (pcbddc->deluxe_zerorows) { 6611 const PetscScalar *array; 6612 const PetscInt *idxs_V,*idxs_all; 6613 PetscInt i,n_V; 6614 6615 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6616 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6617 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6618 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6619 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6620 for (i=0;i<n_V;i++) { 6621 PetscScalar val; 6622 PetscInt idx; 6623 6624 idx = idxs_V[i]; 6625 val = array[idxs_all[idxs_V[i]]]; 6626 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6627 } 6628 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6629 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6630 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6631 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6632 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6633 } 6634 sub_schurs->S_Ej_all = S_new; 6635 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6636 if (sub_schurs->sum_S_Ej_all) { 6637 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6638 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6639 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6640 if (pcbddc->deluxe_zerorows) { 6641 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6642 } 6643 sub_schurs->sum_S_Ej_all = S_new; 6644 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6645 } 6646 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6647 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6648 } 6649 /* destroy any change of basis context in sub_schurs */ 6650 if (sub_schurs && sub_schurs->change) { 6651 PetscInt i; 6652 6653 for (i=0;i<sub_schurs->n_subs;i++) { 6654 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6655 } 6656 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6657 } 6658 } 6659 if (pcbddc->switch_static) { /* need to save the local change */ 6660 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6661 } else { 6662 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6663 } 6664 /* determine if any process has changed the pressures locally */ 6665 pcbddc->change_interior = pcbddc->benign_have_null; 6666 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6667 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6668 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6669 pcbddc->use_qr_single = qr_needed; 6670 } 6671 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6672 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6673 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6674 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6675 } else { 6676 Mat benign_global = NULL; 6677 if (pcbddc->benign_have_null) { 6678 Mat tmat; 6679 6680 pcbddc->change_interior = PETSC_TRUE; 6681 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6682 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6683 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6684 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6685 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6686 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6687 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6688 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6689 if (pcbddc->benign_change) { 6690 Mat M; 6691 6692 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6693 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6694 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6695 ierr = MatDestroy(&M);CHKERRQ(ierr); 6696 } else { 6697 Mat eye; 6698 PetscScalar *array; 6699 6700 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6701 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6702 for (i=0;i<pcis->n;i++) { 6703 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6704 } 6705 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6706 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6707 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6708 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6709 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6710 } 6711 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6712 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6713 } 6714 if (pcbddc->user_ChangeOfBasisMatrix) { 6715 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6716 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6717 } else if (pcbddc->benign_have_null) { 6718 pcbddc->ChangeOfBasisMatrix = benign_global; 6719 } 6720 } 6721 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6722 IS is_global; 6723 const PetscInt *gidxs; 6724 6725 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6726 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6727 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6728 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6729 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6730 } 6731 } 6732 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6733 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6734 } 6735 6736 if (!pcbddc->fake_change) { 6737 /* add pressure dofs to set of primal nodes for numbering purposes */ 6738 for (i=0;i<pcbddc->benign_n;i++) { 6739 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6740 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6741 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6742 pcbddc->local_primal_size_cc++; 6743 pcbddc->local_primal_size++; 6744 } 6745 6746 /* check if a new primal space has been introduced (also take into account benign trick) */ 6747 pcbddc->new_primal_space_local = PETSC_TRUE; 6748 if (olocal_primal_size == pcbddc->local_primal_size) { 6749 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6750 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6751 if (!pcbddc->new_primal_space_local) { 6752 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6753 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6754 } 6755 } 6756 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6757 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6758 } 6759 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6760 6761 /* flush dbg viewer */ 6762 if (pcbddc->dbg_flag) { 6763 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6764 } 6765 6766 /* free workspace */ 6767 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6768 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6769 if (!pcbddc->adaptive_selection) { 6770 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6771 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6772 } else { 6773 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6774 pcbddc->adaptive_constraints_idxs_ptr, 6775 pcbddc->adaptive_constraints_data_ptr, 6776 pcbddc->adaptive_constraints_idxs, 6777 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6778 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6779 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6780 } 6781 PetscFunctionReturn(0); 6782 } 6783 /* #undef PETSC_MISSING_LAPACK_GESVD */ 6784 6785 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6786 { 6787 ISLocalToGlobalMapping map; 6788 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6789 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6790 PetscInt i,N; 6791 PetscBool rcsr = PETSC_FALSE; 6792 PetscErrorCode ierr; 6793 6794 PetscFunctionBegin; 6795 if (pcbddc->recompute_topography) { 6796 pcbddc->graphanalyzed = PETSC_FALSE; 6797 /* Reset previously computed graph */ 6798 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6799 /* Init local Graph struct */ 6800 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6801 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6802 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6803 6804 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6805 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6806 } 6807 /* Check validity of the csr graph passed in by the user */ 6808 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6809 6810 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6811 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6812 PetscInt *xadj,*adjncy; 6813 PetscInt nvtxs; 6814 PetscBool flg_row=PETSC_FALSE; 6815 6816 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6817 if (flg_row) { 6818 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6819 pcbddc->computed_rowadj = PETSC_TRUE; 6820 } 6821 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6822 rcsr = PETSC_TRUE; 6823 } 6824 if (pcbddc->dbg_flag) { 6825 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6826 } 6827 6828 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 6829 PetscReal *lcoords; 6830 PetscInt n; 6831 MPI_Datatype dimrealtype; 6832 6833 if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 6834 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 6835 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 6836 ierr = PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);CHKERRQ(ierr); 6837 ierr = MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);CHKERRQ(ierr); 6838 ierr = MPI_Type_commit(&dimrealtype);CHKERRQ(ierr); 6839 ierr = PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 6840 ierr = PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 6841 ierr = MPI_Type_free(&dimrealtype);CHKERRQ(ierr); 6842 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 6843 6844 pcbddc->mat_graph->coords = lcoords; 6845 pcbddc->mat_graph->cloc = PETSC_TRUE; 6846 pcbddc->mat_graph->cnloc = n; 6847 } 6848 if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 6849 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected); 6850 6851 /* Setup of Graph */ 6852 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6853 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6854 6855 /* attach info on disconnected subdomains if present */ 6856 if (pcbddc->n_local_subs) { 6857 PetscInt *local_subs; 6858 6859 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6860 for (i=0;i<pcbddc->n_local_subs;i++) { 6861 const PetscInt *idxs; 6862 PetscInt nl,j; 6863 6864 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6865 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6866 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6867 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6868 } 6869 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6870 pcbddc->mat_graph->local_subs = local_subs; 6871 } 6872 } 6873 6874 if (!pcbddc->graphanalyzed) { 6875 /* Graph's connected components analysis */ 6876 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6877 pcbddc->graphanalyzed = PETSC_TRUE; 6878 } 6879 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6880 PetscFunctionReturn(0); 6881 } 6882 6883 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6884 { 6885 PetscInt i,j; 6886 PetscScalar *alphas; 6887 PetscErrorCode ierr; 6888 6889 PetscFunctionBegin; 6890 if (!n) PetscFunctionReturn(0); 6891 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6892 ierr = VecNormalize(vecs[0],NULL);CHKERRQ(ierr); 6893 for (i=1;i<n;i++) { 6894 ierr = VecMDot(vecs[i],i,vecs,alphas);CHKERRQ(ierr); 6895 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 6896 ierr = VecMAXPY(vecs[i],i,alphas,vecs);CHKERRQ(ierr); 6897 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6898 } 6899 ierr = PetscFree(alphas);CHKERRQ(ierr); 6900 PetscFunctionReturn(0); 6901 } 6902 6903 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6904 { 6905 Mat A; 6906 PetscInt n_neighs,*neighs,*n_shared,**shared; 6907 PetscMPIInt size,rank,color; 6908 PetscInt *xadj,*adjncy; 6909 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6910 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6911 PetscInt void_procs,*procs_candidates = NULL; 6912 PetscInt xadj_count,*count; 6913 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6914 PetscSubcomm psubcomm; 6915 MPI_Comm subcomm; 6916 PetscErrorCode ierr; 6917 6918 PetscFunctionBegin; 6919 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6920 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6921 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6922 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6923 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6924 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6925 6926 if (have_void) *have_void = PETSC_FALSE; 6927 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6928 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6929 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6930 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6931 im_active = !!n; 6932 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6933 void_procs = size - active_procs; 6934 /* get ranks of of non-active processes in mat communicator */ 6935 if (void_procs) { 6936 PetscInt ncand; 6937 6938 if (have_void) *have_void = PETSC_TRUE; 6939 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6940 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6941 for (i=0,ncand=0;i<size;i++) { 6942 if (!procs_candidates[i]) { 6943 procs_candidates[ncand++] = i; 6944 } 6945 } 6946 /* force n_subdomains to be not greater that the number of non-active processes */ 6947 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6948 } 6949 6950 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6951 number of subdomains requested 1 -> send to master or first candidate in voids */ 6952 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6953 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6954 PetscInt issize,isidx,dest; 6955 if (*n_subdomains == 1) dest = 0; 6956 else dest = rank; 6957 if (im_active) { 6958 issize = 1; 6959 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6960 isidx = procs_candidates[dest]; 6961 } else { 6962 isidx = dest; 6963 } 6964 } else { 6965 issize = 0; 6966 isidx = -1; 6967 } 6968 if (*n_subdomains != 1) *n_subdomains = active_procs; 6969 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6970 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6971 PetscFunctionReturn(0); 6972 } 6973 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6974 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6975 threshold = PetscMax(threshold,2); 6976 6977 /* Get info on mapping */ 6978 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6979 6980 /* build local CSR graph of subdomains' connectivity */ 6981 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6982 xadj[0] = 0; 6983 xadj[1] = PetscMax(n_neighs-1,0); 6984 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6985 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6986 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6987 for (i=1;i<n_neighs;i++) 6988 for (j=0;j<n_shared[i];j++) 6989 count[shared[i][j]] += 1; 6990 6991 xadj_count = 0; 6992 for (i=1;i<n_neighs;i++) { 6993 for (j=0;j<n_shared[i];j++) { 6994 if (count[shared[i][j]] < threshold) { 6995 adjncy[xadj_count] = neighs[i]; 6996 adjncy_wgt[xadj_count] = n_shared[i]; 6997 xadj_count++; 6998 break; 6999 } 7000 } 7001 } 7002 xadj[1] = xadj_count; 7003 ierr = PetscFree(count);CHKERRQ(ierr); 7004 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7005 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7006 7007 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 7008 7009 /* Restrict work on active processes only */ 7010 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 7011 if (void_procs) { 7012 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 7013 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 7014 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 7015 subcomm = PetscSubcommChild(psubcomm); 7016 } else { 7017 psubcomm = NULL; 7018 subcomm = PetscObjectComm((PetscObject)mat); 7019 } 7020 7021 v_wgt = NULL; 7022 if (!color) { 7023 ierr = PetscFree(xadj);CHKERRQ(ierr); 7024 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7025 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7026 } else { 7027 Mat subdomain_adj; 7028 IS new_ranks,new_ranks_contig; 7029 MatPartitioning partitioner; 7030 PetscInt rstart=0,rend=0; 7031 PetscInt *is_indices,*oldranks; 7032 PetscMPIInt size; 7033 PetscBool aggregate; 7034 7035 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 7036 if (void_procs) { 7037 PetscInt prank = rank; 7038 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 7039 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 7040 for (i=0;i<xadj[1];i++) { 7041 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 7042 } 7043 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7044 } else { 7045 oldranks = NULL; 7046 } 7047 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7048 if (aggregate) { /* TODO: all this part could be made more efficient */ 7049 PetscInt lrows,row,ncols,*cols; 7050 PetscMPIInt nrank; 7051 PetscScalar *vals; 7052 7053 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 7054 lrows = 0; 7055 if (nrank<redprocs) { 7056 lrows = size/redprocs; 7057 if (nrank<size%redprocs) lrows++; 7058 } 7059 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 7060 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 7061 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7062 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7063 row = nrank; 7064 ncols = xadj[1]-xadj[0]; 7065 cols = adjncy; 7066 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 7067 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7068 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 7069 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7070 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7071 ierr = PetscFree(xadj);CHKERRQ(ierr); 7072 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7073 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7074 ierr = PetscFree(vals);CHKERRQ(ierr); 7075 if (use_vwgt) { 7076 Vec v; 7077 const PetscScalar *array; 7078 PetscInt nl; 7079 7080 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 7081 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 7082 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 7083 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 7084 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 7085 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 7086 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 7087 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7088 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 7089 ierr = VecDestroy(&v);CHKERRQ(ierr); 7090 } 7091 } else { 7092 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 7093 if (use_vwgt) { 7094 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 7095 v_wgt[0] = n; 7096 } 7097 } 7098 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 7099 7100 /* Partition */ 7101 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 7102 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 7103 if (v_wgt) { 7104 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 7105 } 7106 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7107 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 7108 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 7109 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 7110 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 7111 7112 /* renumber new_ranks to avoid "holes" in new set of processors */ 7113 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 7114 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 7115 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7116 if (!aggregate) { 7117 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7118 #if defined(PETSC_USE_DEBUG) 7119 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7120 #endif 7121 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7122 } else if (oldranks) { 7123 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7124 } else { 7125 ranks_send_to_idx[0] = is_indices[0]; 7126 } 7127 } else { 7128 PetscInt idx = 0; 7129 PetscMPIInt tag; 7130 MPI_Request *reqs; 7131 7132 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 7133 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 7134 for (i=rstart;i<rend;i++) { 7135 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 7136 } 7137 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 7138 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7139 ierr = PetscFree(reqs);CHKERRQ(ierr); 7140 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7141 #if defined(PETSC_USE_DEBUG) 7142 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7143 #endif 7144 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7145 } else if (oldranks) { 7146 ranks_send_to_idx[0] = oldranks[idx]; 7147 } else { 7148 ranks_send_to_idx[0] = idx; 7149 } 7150 } 7151 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7152 /* clean up */ 7153 ierr = PetscFree(oldranks);CHKERRQ(ierr); 7154 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 7155 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 7156 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 7157 } 7158 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 7159 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7160 7161 /* assemble parallel IS for sends */ 7162 i = 1; 7163 if (!color) i=0; 7164 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 7165 PetscFunctionReturn(0); 7166 } 7167 7168 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7169 7170 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7171 { 7172 Mat local_mat; 7173 IS is_sends_internal; 7174 PetscInt rows,cols,new_local_rows; 7175 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7176 PetscBool ismatis,isdense,newisdense,destroy_mat; 7177 ISLocalToGlobalMapping l2gmap; 7178 PetscInt* l2gmap_indices; 7179 const PetscInt* is_indices; 7180 MatType new_local_type; 7181 /* buffers */ 7182 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7183 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7184 PetscInt *recv_buffer_idxs_local; 7185 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 7186 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7187 /* MPI */ 7188 MPI_Comm comm,comm_n; 7189 PetscSubcomm subcomm; 7190 PetscMPIInt n_sends,n_recvs,commsize; 7191 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7192 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7193 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7194 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7195 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7196 PetscErrorCode ierr; 7197 7198 PetscFunctionBegin; 7199 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7200 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7201 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7202 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7203 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7204 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7205 PetscValidLogicalCollectiveBool(mat,reuse,6); 7206 PetscValidLogicalCollectiveInt(mat,nis,8); 7207 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7208 if (nvecs) { 7209 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7210 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7211 } 7212 /* further checks */ 7213 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7214 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7215 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7216 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7217 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7218 if (reuse && *mat_n) { 7219 PetscInt mrows,mcols,mnrows,mncols; 7220 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7221 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7222 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7223 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7224 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7225 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7226 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7227 } 7228 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7229 PetscValidLogicalCollectiveInt(mat,bs,0); 7230 7231 /* prepare IS for sending if not provided */ 7232 if (!is_sends) { 7233 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7234 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7235 } else { 7236 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7237 is_sends_internal = is_sends; 7238 } 7239 7240 /* get comm */ 7241 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7242 7243 /* compute number of sends */ 7244 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7245 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7246 7247 /* compute number of receives */ 7248 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 7249 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 7250 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 7251 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7252 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7253 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7254 ierr = PetscFree(iflags);CHKERRQ(ierr); 7255 7256 /* restrict comm if requested */ 7257 subcomm = 0; 7258 destroy_mat = PETSC_FALSE; 7259 if (restrict_comm) { 7260 PetscMPIInt color,subcommsize; 7261 7262 color = 0; 7263 if (restrict_full) { 7264 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7265 } else { 7266 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7267 } 7268 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7269 subcommsize = commsize - subcommsize; 7270 /* check if reuse has been requested */ 7271 if (reuse) { 7272 if (*mat_n) { 7273 PetscMPIInt subcommsize2; 7274 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 7275 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7276 comm_n = PetscObjectComm((PetscObject)*mat_n); 7277 } else { 7278 comm_n = PETSC_COMM_SELF; 7279 } 7280 } else { /* MAT_INITIAL_MATRIX */ 7281 PetscMPIInt rank; 7282 7283 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 7284 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7285 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7286 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7287 comm_n = PetscSubcommChild(subcomm); 7288 } 7289 /* flag to destroy *mat_n if not significative */ 7290 if (color) destroy_mat = PETSC_TRUE; 7291 } else { 7292 comm_n = comm; 7293 } 7294 7295 /* prepare send/receive buffers */ 7296 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 7297 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 7298 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 7299 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 7300 if (nis) { 7301 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 7302 } 7303 7304 /* Get data from local matrices */ 7305 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7306 /* TODO: See below some guidelines on how to prepare the local buffers */ 7307 /* 7308 send_buffer_vals should contain the raw values of the local matrix 7309 send_buffer_idxs should contain: 7310 - MatType_PRIVATE type 7311 - PetscInt size_of_l2gmap 7312 - PetscInt global_row_indices[size_of_l2gmap] 7313 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7314 */ 7315 else { 7316 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7317 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7318 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7319 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7320 send_buffer_idxs[1] = i; 7321 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7322 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 7323 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7324 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7325 for (i=0;i<n_sends;i++) { 7326 ilengths_vals[is_indices[i]] = len*len; 7327 ilengths_idxs[is_indices[i]] = len+2; 7328 } 7329 } 7330 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7331 /* additional is (if any) */ 7332 if (nis) { 7333 PetscMPIInt psum; 7334 PetscInt j; 7335 for (j=0,psum=0;j<nis;j++) { 7336 PetscInt plen; 7337 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7338 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7339 psum += len+1; /* indices + lenght */ 7340 } 7341 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7342 for (j=0,psum=0;j<nis;j++) { 7343 PetscInt plen; 7344 const PetscInt *is_array_idxs; 7345 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7346 send_buffer_idxs_is[psum] = plen; 7347 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7348 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 7349 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7350 psum += plen+1; /* indices + lenght */ 7351 } 7352 for (i=0;i<n_sends;i++) { 7353 ilengths_idxs_is[is_indices[i]] = psum; 7354 } 7355 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7356 } 7357 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7358 7359 buf_size_idxs = 0; 7360 buf_size_vals = 0; 7361 buf_size_idxs_is = 0; 7362 buf_size_vecs = 0; 7363 for (i=0;i<n_recvs;i++) { 7364 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7365 buf_size_vals += (PetscInt)olengths_vals[i]; 7366 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7367 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7368 } 7369 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7370 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7371 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7372 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7373 7374 /* get new tags for clean communications */ 7375 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7376 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7377 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7378 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7379 7380 /* allocate for requests */ 7381 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7382 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7383 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7384 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7385 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7386 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7387 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7388 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7389 7390 /* communications */ 7391 ptr_idxs = recv_buffer_idxs; 7392 ptr_vals = recv_buffer_vals; 7393 ptr_idxs_is = recv_buffer_idxs_is; 7394 ptr_vecs = recv_buffer_vecs; 7395 for (i=0;i<n_recvs;i++) { 7396 source_dest = onodes[i]; 7397 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7398 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7399 ptr_idxs += olengths_idxs[i]; 7400 ptr_vals += olengths_vals[i]; 7401 if (nis) { 7402 source_dest = onodes_is[i]; 7403 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7404 ptr_idxs_is += olengths_idxs_is[i]; 7405 } 7406 if (nvecs) { 7407 source_dest = onodes[i]; 7408 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7409 ptr_vecs += olengths_idxs[i]-2; 7410 } 7411 } 7412 for (i=0;i<n_sends;i++) { 7413 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7414 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7415 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7416 if (nis) { 7417 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7418 } 7419 if (nvecs) { 7420 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7421 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7422 } 7423 } 7424 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7425 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7426 7427 /* assemble new l2g map */ 7428 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7429 ptr_idxs = recv_buffer_idxs; 7430 new_local_rows = 0; 7431 for (i=0;i<n_recvs;i++) { 7432 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7433 ptr_idxs += olengths_idxs[i]; 7434 } 7435 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7436 ptr_idxs = recv_buffer_idxs; 7437 new_local_rows = 0; 7438 for (i=0;i<n_recvs;i++) { 7439 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 7440 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7441 ptr_idxs += olengths_idxs[i]; 7442 } 7443 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7444 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7445 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7446 7447 /* infer new local matrix type from received local matrices type */ 7448 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7449 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7450 if (n_recvs) { 7451 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7452 ptr_idxs = recv_buffer_idxs; 7453 for (i=0;i<n_recvs;i++) { 7454 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7455 new_local_type_private = MATAIJ_PRIVATE; 7456 break; 7457 } 7458 ptr_idxs += olengths_idxs[i]; 7459 } 7460 switch (new_local_type_private) { 7461 case MATDENSE_PRIVATE: 7462 new_local_type = MATSEQAIJ; 7463 bs = 1; 7464 break; 7465 case MATAIJ_PRIVATE: 7466 new_local_type = MATSEQAIJ; 7467 bs = 1; 7468 break; 7469 case MATBAIJ_PRIVATE: 7470 new_local_type = MATSEQBAIJ; 7471 break; 7472 case MATSBAIJ_PRIVATE: 7473 new_local_type = MATSEQSBAIJ; 7474 break; 7475 default: 7476 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7477 break; 7478 } 7479 } else { /* by default, new_local_type is seqaij */ 7480 new_local_type = MATSEQAIJ; 7481 bs = 1; 7482 } 7483 7484 /* create MATIS object if needed */ 7485 if (!reuse) { 7486 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7487 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7488 } else { 7489 /* it also destroys the local matrices */ 7490 if (*mat_n) { 7491 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7492 } else { /* this is a fake object */ 7493 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7494 } 7495 } 7496 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7497 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7498 7499 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7500 7501 /* Global to local map of received indices */ 7502 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7503 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7504 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7505 7506 /* restore attributes -> type of incoming data and its size */ 7507 buf_size_idxs = 0; 7508 for (i=0;i<n_recvs;i++) { 7509 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7510 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7511 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7512 } 7513 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7514 7515 /* set preallocation */ 7516 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7517 if (!newisdense) { 7518 PetscInt *new_local_nnz=0; 7519 7520 ptr_idxs = recv_buffer_idxs_local; 7521 if (n_recvs) { 7522 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7523 } 7524 for (i=0;i<n_recvs;i++) { 7525 PetscInt j; 7526 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7527 for (j=0;j<*(ptr_idxs+1);j++) { 7528 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7529 } 7530 } else { 7531 /* TODO */ 7532 } 7533 ptr_idxs += olengths_idxs[i]; 7534 } 7535 if (new_local_nnz) { 7536 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7537 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7538 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7539 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7540 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7541 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7542 } else { 7543 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7544 } 7545 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7546 } else { 7547 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7548 } 7549 7550 /* set values */ 7551 ptr_vals = recv_buffer_vals; 7552 ptr_idxs = recv_buffer_idxs_local; 7553 for (i=0;i<n_recvs;i++) { 7554 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7555 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7556 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7557 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7558 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7559 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7560 } else { 7561 /* TODO */ 7562 } 7563 ptr_idxs += olengths_idxs[i]; 7564 ptr_vals += olengths_vals[i]; 7565 } 7566 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7567 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7568 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7569 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7570 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7571 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7572 7573 #if 0 7574 if (!restrict_comm) { /* check */ 7575 Vec lvec,rvec; 7576 PetscReal infty_error; 7577 7578 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7579 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7580 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7581 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7582 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7583 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7584 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7585 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7586 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7587 } 7588 #endif 7589 7590 /* assemble new additional is (if any) */ 7591 if (nis) { 7592 PetscInt **temp_idxs,*count_is,j,psum; 7593 7594 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7595 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7596 ptr_idxs = recv_buffer_idxs_is; 7597 psum = 0; 7598 for (i=0;i<n_recvs;i++) { 7599 for (j=0;j<nis;j++) { 7600 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7601 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7602 psum += plen; 7603 ptr_idxs += plen+1; /* shift pointer to received data */ 7604 } 7605 } 7606 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7607 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7608 for (i=1;i<nis;i++) { 7609 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7610 } 7611 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7612 ptr_idxs = recv_buffer_idxs_is; 7613 for (i=0;i<n_recvs;i++) { 7614 for (j=0;j<nis;j++) { 7615 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7616 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7617 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7618 ptr_idxs += plen+1; /* shift pointer to received data */ 7619 } 7620 } 7621 for (i=0;i<nis;i++) { 7622 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7623 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7624 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7625 } 7626 ierr = PetscFree(count_is);CHKERRQ(ierr); 7627 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7628 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7629 } 7630 /* free workspace */ 7631 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7632 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7633 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7634 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7635 if (isdense) { 7636 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7637 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7638 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7639 } else { 7640 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7641 } 7642 if (nis) { 7643 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7644 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7645 } 7646 7647 if (nvecs) { 7648 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7649 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7650 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7651 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7652 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7653 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7654 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7655 /* set values */ 7656 ptr_vals = recv_buffer_vecs; 7657 ptr_idxs = recv_buffer_idxs_local; 7658 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7659 for (i=0;i<n_recvs;i++) { 7660 PetscInt j; 7661 for (j=0;j<*(ptr_idxs+1);j++) { 7662 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7663 } 7664 ptr_idxs += olengths_idxs[i]; 7665 ptr_vals += olengths_idxs[i]-2; 7666 } 7667 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7668 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7669 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7670 } 7671 7672 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7673 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7674 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7675 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7676 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7677 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7678 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7679 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7680 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7681 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7682 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7683 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7684 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7685 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7686 ierr = PetscFree(onodes);CHKERRQ(ierr); 7687 if (nis) { 7688 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7689 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7690 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7691 } 7692 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7693 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7694 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7695 for (i=0;i<nis;i++) { 7696 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7697 } 7698 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7699 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7700 } 7701 *mat_n = NULL; 7702 } 7703 PetscFunctionReturn(0); 7704 } 7705 7706 /* temporary hack into ksp private data structure */ 7707 #include <petsc/private/kspimpl.h> 7708 7709 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7710 { 7711 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7712 PC_IS *pcis = (PC_IS*)pc->data; 7713 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7714 Mat coarsedivudotp = NULL; 7715 Mat coarseG,t_coarse_mat_is; 7716 MatNullSpace CoarseNullSpace = NULL; 7717 ISLocalToGlobalMapping coarse_islg; 7718 IS coarse_is,*isarray; 7719 PetscInt i,im_active=-1,active_procs=-1; 7720 PetscInt nis,nisdofs,nisneu,nisvert; 7721 PC pc_temp; 7722 PCType coarse_pc_type; 7723 KSPType coarse_ksp_type; 7724 PetscBool multilevel_requested,multilevel_allowed; 7725 PetscBool coarse_reuse; 7726 PetscInt ncoarse,nedcfield; 7727 PetscBool compute_vecs = PETSC_FALSE; 7728 PetscScalar *array; 7729 MatReuse coarse_mat_reuse; 7730 PetscBool restr, full_restr, have_void; 7731 PetscMPIInt commsize; 7732 PetscErrorCode ierr; 7733 7734 PetscFunctionBegin; 7735 /* Assign global numbering to coarse dofs */ 7736 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7737 PetscInt ocoarse_size; 7738 compute_vecs = PETSC_TRUE; 7739 7740 pcbddc->new_primal_space = PETSC_TRUE; 7741 ocoarse_size = pcbddc->coarse_size; 7742 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7743 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7744 /* see if we can avoid some work */ 7745 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7746 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7747 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7748 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7749 coarse_reuse = PETSC_FALSE; 7750 } else { /* we can safely reuse already computed coarse matrix */ 7751 coarse_reuse = PETSC_TRUE; 7752 } 7753 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7754 coarse_reuse = PETSC_FALSE; 7755 } 7756 /* reset any subassembling information */ 7757 if (!coarse_reuse || pcbddc->recompute_topography) { 7758 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7759 } 7760 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7761 coarse_reuse = PETSC_TRUE; 7762 } 7763 /* assemble coarse matrix */ 7764 if (coarse_reuse && pcbddc->coarse_ksp) { 7765 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7766 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7767 coarse_mat_reuse = MAT_REUSE_MATRIX; 7768 } else { 7769 coarse_mat = NULL; 7770 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7771 } 7772 7773 /* creates temporary l2gmap and IS for coarse indexes */ 7774 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7775 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7776 7777 /* creates temporary MATIS object for coarse matrix */ 7778 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7779 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7780 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7781 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7782 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7783 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7784 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7785 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7786 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7787 7788 /* count "active" (i.e. with positive local size) and "void" processes */ 7789 im_active = !!(pcis->n); 7790 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7791 7792 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7793 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7794 /* full_restr : just use the receivers from the subassembling pattern */ 7795 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);CHKERRQ(ierr); 7796 coarse_mat_is = NULL; 7797 multilevel_allowed = PETSC_FALSE; 7798 multilevel_requested = PETSC_FALSE; 7799 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7800 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7801 if (multilevel_requested) { 7802 ncoarse = active_procs/pcbddc->coarsening_ratio; 7803 restr = PETSC_FALSE; 7804 full_restr = PETSC_FALSE; 7805 } else { 7806 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7807 restr = PETSC_TRUE; 7808 full_restr = PETSC_TRUE; 7809 } 7810 if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7811 ncoarse = PetscMax(1,ncoarse); 7812 if (!pcbddc->coarse_subassembling) { 7813 if (pcbddc->coarsening_ratio > 1) { 7814 if (multilevel_requested) { 7815 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7816 } else { 7817 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7818 } 7819 } else { 7820 PetscMPIInt rank; 7821 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7822 have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE; 7823 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7824 } 7825 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7826 PetscInt psum; 7827 if (pcbddc->coarse_ksp) psum = 1; 7828 else psum = 0; 7829 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7830 if (ncoarse < commsize) have_void = PETSC_TRUE; 7831 } 7832 /* determine if we can go multilevel */ 7833 if (multilevel_requested) { 7834 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7835 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7836 } 7837 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7838 7839 /* dump subassembling pattern */ 7840 if (pcbddc->dbg_flag && multilevel_allowed) { 7841 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7842 } 7843 7844 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7845 nedcfield = -1; 7846 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7847 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7848 const PetscInt *idxs; 7849 ISLocalToGlobalMapping tmap; 7850 7851 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7852 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7853 /* allocate space for temporary storage */ 7854 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7855 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7856 /* allocate for IS array */ 7857 nisdofs = pcbddc->n_ISForDofsLocal; 7858 if (pcbddc->nedclocal) { 7859 if (pcbddc->nedfield > -1) { 7860 nedcfield = pcbddc->nedfield; 7861 } else { 7862 nedcfield = 0; 7863 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7864 nisdofs = 1; 7865 } 7866 } 7867 nisneu = !!pcbddc->NeumannBoundariesLocal; 7868 nisvert = 0; /* nisvert is not used */ 7869 nis = nisdofs + nisneu + nisvert; 7870 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7871 /* dofs splitting */ 7872 for (i=0;i<nisdofs;i++) { 7873 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7874 if (nedcfield != i) { 7875 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7876 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7877 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7878 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7879 } else { 7880 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7881 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7882 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7883 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7884 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7885 } 7886 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7887 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7888 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7889 } 7890 /* neumann boundaries */ 7891 if (pcbddc->NeumannBoundariesLocal) { 7892 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7893 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7894 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7895 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7896 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7897 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7898 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7899 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7900 } 7901 /* free memory */ 7902 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7903 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7904 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7905 } else { 7906 nis = 0; 7907 nisdofs = 0; 7908 nisneu = 0; 7909 nisvert = 0; 7910 isarray = NULL; 7911 } 7912 /* destroy no longer needed map */ 7913 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7914 7915 /* subassemble */ 7916 if (multilevel_allowed) { 7917 Vec vp[1]; 7918 PetscInt nvecs = 0; 7919 PetscBool reuse,reuser; 7920 7921 if (coarse_mat) reuse = PETSC_TRUE; 7922 else reuse = PETSC_FALSE; 7923 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7924 vp[0] = NULL; 7925 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7926 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7927 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7928 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7929 nvecs = 1; 7930 7931 if (pcbddc->divudotp) { 7932 Mat B,loc_divudotp; 7933 Vec v,p; 7934 IS dummy; 7935 PetscInt np; 7936 7937 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7938 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7939 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7940 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7941 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7942 ierr = VecSet(p,1.);CHKERRQ(ierr); 7943 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7944 ierr = VecDestroy(&p);CHKERRQ(ierr); 7945 ierr = MatDestroy(&B);CHKERRQ(ierr); 7946 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7947 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7948 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7949 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7950 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7951 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7952 ierr = VecDestroy(&v);CHKERRQ(ierr); 7953 } 7954 } 7955 if (reuser) { 7956 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7957 } else { 7958 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7959 } 7960 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7961 PetscScalar *arraym,*arrayv; 7962 PetscInt nl; 7963 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7964 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7965 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7966 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7967 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7968 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7969 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7970 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7971 } else { 7972 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7973 } 7974 } else { 7975 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7976 } 7977 if (coarse_mat_is || coarse_mat) { 7978 PetscMPIInt size; 7979 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7980 if (!multilevel_allowed) { 7981 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7982 } else { 7983 Mat A; 7984 7985 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7986 if (coarse_mat_is) { 7987 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7988 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7989 coarse_mat = coarse_mat_is; 7990 } 7991 /* be sure we don't have MatSeqDENSE as local mat */ 7992 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7993 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7994 } 7995 } 7996 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7997 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7998 7999 /* create local to global scatters for coarse problem */ 8000 if (compute_vecs) { 8001 PetscInt lrows; 8002 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 8003 if (coarse_mat) { 8004 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 8005 } else { 8006 lrows = 0; 8007 } 8008 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 8009 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 8010 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 8011 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8012 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8013 } 8014 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 8015 8016 /* set defaults for coarse KSP and PC */ 8017 if (multilevel_allowed) { 8018 coarse_ksp_type = KSPRICHARDSON; 8019 coarse_pc_type = PCBDDC; 8020 } else { 8021 coarse_ksp_type = KSPPREONLY; 8022 coarse_pc_type = PCREDUNDANT; 8023 } 8024 8025 /* print some info if requested */ 8026 if (pcbddc->dbg_flag) { 8027 if (!multilevel_allowed) { 8028 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8029 if (multilevel_requested) { 8030 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8031 } else if (pcbddc->max_levels) { 8032 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 8033 } 8034 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8035 } 8036 } 8037 8038 /* communicate coarse discrete gradient */ 8039 coarseG = NULL; 8040 if (pcbddc->nedcG && multilevel_allowed) { 8041 MPI_Comm ccomm; 8042 if (coarse_mat) { 8043 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8044 } else { 8045 ccomm = MPI_COMM_NULL; 8046 } 8047 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 8048 } 8049 8050 /* create the coarse KSP object only once with defaults */ 8051 if (coarse_mat) { 8052 PetscBool isredundant,isnn,isbddc; 8053 PetscViewer dbg_viewer = NULL; 8054 8055 if (pcbddc->dbg_flag) { 8056 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8057 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8058 } 8059 if (!pcbddc->coarse_ksp) { 8060 char prefix[256],str_level[16]; 8061 size_t len; 8062 8063 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 8064 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8065 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 8066 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 8067 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8068 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 8069 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 8070 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8071 /* TODO is this logic correct? should check for coarse_mat type */ 8072 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8073 /* prefix */ 8074 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 8075 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 8076 if (!pcbddc->current_level) { 8077 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 8078 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 8079 } else { 8080 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 8081 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8082 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8083 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 8084 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 8085 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 8086 } 8087 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 8088 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8089 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8090 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8091 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8092 /* allow user customization */ 8093 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 8094 } 8095 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8096 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8097 if (nisdofs) { 8098 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 8099 for (i=0;i<nisdofs;i++) { 8100 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8101 } 8102 } 8103 if (nisneu) { 8104 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 8105 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 8106 } 8107 if (nisvert) { 8108 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 8109 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 8110 } 8111 if (coarseG) { 8112 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 8113 } 8114 8115 /* get some info after set from options */ 8116 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8117 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8118 if (isbddc && !multilevel_allowed) { 8119 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8120 isbddc = PETSC_FALSE; 8121 } 8122 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 8123 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 8124 if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) { 8125 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8126 isbddc = PETSC_TRUE; 8127 } 8128 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 8129 if (isredundant) { 8130 KSP inner_ksp; 8131 PC inner_pc; 8132 8133 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 8134 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 8135 } 8136 8137 /* parameters which miss an API */ 8138 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8139 if (isbddc) { 8140 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8141 8142 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8143 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8144 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8145 if (pcbddc_coarse->benign_saddle_point) { 8146 Mat coarsedivudotp_is; 8147 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8148 IS row,col; 8149 const PetscInt *gidxs; 8150 PetscInt n,st,M,N; 8151 8152 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 8153 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 8154 st = st-n; 8155 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 8156 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 8157 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 8158 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8159 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 8160 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8161 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 8162 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 8163 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 8164 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 8165 ierr = ISDestroy(&row);CHKERRQ(ierr); 8166 ierr = ISDestroy(&col);CHKERRQ(ierr); 8167 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 8168 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8169 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8170 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8171 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8172 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8173 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8174 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8175 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8176 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8177 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8178 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8179 } 8180 } 8181 8182 /* propagate symmetry info of coarse matrix */ 8183 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8184 if (pc->pmat->symmetric_set) { 8185 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8186 } 8187 if (pc->pmat->hermitian_set) { 8188 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8189 } 8190 if (pc->pmat->spd_set) { 8191 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8192 } 8193 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8194 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8195 } 8196 /* set operators */ 8197 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8198 if (pcbddc->dbg_flag) { 8199 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8200 } 8201 } 8202 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8203 ierr = PetscFree(isarray);CHKERRQ(ierr); 8204 #if 0 8205 { 8206 PetscViewer viewer; 8207 char filename[256]; 8208 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8209 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8210 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8211 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8212 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8213 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8214 } 8215 #endif 8216 8217 if (pcbddc->coarse_ksp) { 8218 Vec crhs,csol; 8219 8220 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8221 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8222 if (!csol) { 8223 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8224 } 8225 if (!crhs) { 8226 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8227 } 8228 } 8229 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8230 8231 /* compute null space for coarse solver if the benign trick has been requested */ 8232 if (pcbddc->benign_null) { 8233 8234 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8235 for (i=0;i<pcbddc->benign_n;i++) { 8236 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8237 } 8238 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8239 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8240 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8241 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8242 if (coarse_mat) { 8243 Vec nullv; 8244 PetscScalar *array,*array2; 8245 PetscInt nl; 8246 8247 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8248 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8249 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8250 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8251 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 8252 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8253 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8254 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8255 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8256 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8257 } 8258 } 8259 8260 if (pcbddc->coarse_ksp) { 8261 PetscBool ispreonly; 8262 8263 if (CoarseNullSpace) { 8264 PetscBool isnull; 8265 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8266 if (isnull) { 8267 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8268 } 8269 /* TODO: add local nullspaces (if any) */ 8270 } 8271 /* setup coarse ksp */ 8272 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8273 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8274 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8275 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 8276 KSP check_ksp; 8277 KSPType check_ksp_type; 8278 PC check_pc; 8279 Vec check_vec,coarse_vec; 8280 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8281 PetscInt its; 8282 PetscBool compute_eigs; 8283 PetscReal *eigs_r,*eigs_c; 8284 PetscInt neigs; 8285 const char *prefix; 8286 8287 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8288 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8289 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8290 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8291 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8292 /* prevent from setup unneeded object */ 8293 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8294 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8295 if (ispreonly) { 8296 check_ksp_type = KSPPREONLY; 8297 compute_eigs = PETSC_FALSE; 8298 } else { 8299 check_ksp_type = KSPGMRES; 8300 compute_eigs = PETSC_TRUE; 8301 } 8302 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8303 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8304 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8305 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8306 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8307 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8308 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8309 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8310 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8311 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8312 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8313 /* create random vec */ 8314 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8315 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8316 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8317 /* solve coarse problem */ 8318 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8319 /* set eigenvalue estimation if preonly has not been requested */ 8320 if (compute_eigs) { 8321 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8322 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8323 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8324 if (neigs) { 8325 lambda_max = eigs_r[neigs-1]; 8326 lambda_min = eigs_r[0]; 8327 if (pcbddc->use_coarse_estimates) { 8328 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8329 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8330 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8331 } 8332 } 8333 } 8334 } 8335 8336 /* check coarse problem residual error */ 8337 if (pcbddc->dbg_flag) { 8338 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8339 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8340 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8341 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8342 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8343 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8344 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8345 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8346 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8347 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8348 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8349 if (CoarseNullSpace) { 8350 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8351 } 8352 if (compute_eigs) { 8353 PetscReal lambda_max_s,lambda_min_s; 8354 KSPConvergedReason reason; 8355 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8356 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8357 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8358 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8359 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8360 for (i=0;i<neigs;i++) { 8361 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8362 } 8363 } 8364 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8365 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8366 } 8367 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8368 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8369 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8370 if (compute_eigs) { 8371 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8372 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8373 } 8374 } 8375 } 8376 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8377 /* print additional info */ 8378 if (pcbddc->dbg_flag) { 8379 /* waits until all processes reaches this point */ 8380 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8381 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 8382 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8383 } 8384 8385 /* free memory */ 8386 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8387 PetscFunctionReturn(0); 8388 } 8389 8390 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8391 { 8392 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8393 PC_IS* pcis = (PC_IS*)pc->data; 8394 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8395 IS subset,subset_mult,subset_n; 8396 PetscInt local_size,coarse_size=0; 8397 PetscInt *local_primal_indices=NULL; 8398 const PetscInt *t_local_primal_indices; 8399 PetscErrorCode ierr; 8400 8401 PetscFunctionBegin; 8402 /* Compute global number of coarse dofs */ 8403 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8404 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8405 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8406 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8407 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8408 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8409 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8410 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8411 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8412 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8413 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8414 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8415 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 8416 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8417 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8418 8419 /* check numbering */ 8420 if (pcbddc->dbg_flag) { 8421 PetscScalar coarsesum,*array,*array2; 8422 PetscInt i; 8423 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8424 8425 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8426 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8427 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8428 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8429 /* counter */ 8430 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8431 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8432 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8433 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8434 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8435 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8436 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8437 for (i=0;i<pcbddc->local_primal_size;i++) { 8438 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8439 } 8440 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8441 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8442 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8443 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8444 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8445 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8446 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8447 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8448 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8449 for (i=0;i<pcis->n;i++) { 8450 if (array[i] != 0.0 && array[i] != array2[i]) { 8451 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8452 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8453 set_error = PETSC_TRUE; 8454 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8455 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8456 } 8457 } 8458 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8459 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8460 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8461 for (i=0;i<pcis->n;i++) { 8462 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8463 } 8464 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8465 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8466 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8467 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8468 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8469 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8470 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8471 PetscInt *gidxs; 8472 8473 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8474 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8475 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8476 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8477 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8478 for (i=0;i<pcbddc->local_primal_size;i++) { 8479 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8480 } 8481 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8482 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8483 } 8484 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8485 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8486 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8487 } 8488 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 8489 /* get back data */ 8490 *coarse_size_n = coarse_size; 8491 *local_primal_indices_n = local_primal_indices; 8492 PetscFunctionReturn(0); 8493 } 8494 8495 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8496 { 8497 IS localis_t; 8498 PetscInt i,lsize,*idxs,n; 8499 PetscScalar *vals; 8500 PetscErrorCode ierr; 8501 8502 PetscFunctionBegin; 8503 /* get indices in local ordering exploiting local to global map */ 8504 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8505 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8506 for (i=0;i<lsize;i++) vals[i] = 1.0; 8507 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8508 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8509 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8510 if (idxs) { /* multilevel guard */ 8511 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 8512 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8513 } 8514 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8515 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8516 ierr = PetscFree(vals);CHKERRQ(ierr); 8517 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8518 /* now compute set in local ordering */ 8519 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8520 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8521 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8522 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8523 for (i=0,lsize=0;i<n;i++) { 8524 if (PetscRealPart(vals[i]) > 0.5) { 8525 lsize++; 8526 } 8527 } 8528 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8529 for (i=0,lsize=0;i<n;i++) { 8530 if (PetscRealPart(vals[i]) > 0.5) { 8531 idxs[lsize++] = i; 8532 } 8533 } 8534 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8535 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8536 *localis = localis_t; 8537 PetscFunctionReturn(0); 8538 } 8539 8540 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8541 { 8542 PC_IS *pcis=(PC_IS*)pc->data; 8543 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8544 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8545 Mat S_j; 8546 PetscInt *used_xadj,*used_adjncy; 8547 PetscBool free_used_adj; 8548 PetscErrorCode ierr; 8549 8550 PetscFunctionBegin; 8551 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8552 free_used_adj = PETSC_FALSE; 8553 if (pcbddc->sub_schurs_layers == -1) { 8554 used_xadj = NULL; 8555 used_adjncy = NULL; 8556 } else { 8557 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8558 used_xadj = pcbddc->mat_graph->xadj; 8559 used_adjncy = pcbddc->mat_graph->adjncy; 8560 } else if (pcbddc->computed_rowadj) { 8561 used_xadj = pcbddc->mat_graph->xadj; 8562 used_adjncy = pcbddc->mat_graph->adjncy; 8563 } else { 8564 PetscBool flg_row=PETSC_FALSE; 8565 const PetscInt *xadj,*adjncy; 8566 PetscInt nvtxs; 8567 8568 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8569 if (flg_row) { 8570 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8571 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8572 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8573 free_used_adj = PETSC_TRUE; 8574 } else { 8575 pcbddc->sub_schurs_layers = -1; 8576 used_xadj = NULL; 8577 used_adjncy = NULL; 8578 } 8579 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8580 } 8581 } 8582 8583 /* setup sub_schurs data */ 8584 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8585 if (!sub_schurs->schur_explicit) { 8586 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8587 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8588 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8589 } else { 8590 Mat change = NULL; 8591 Vec scaling = NULL; 8592 IS change_primal = NULL, iP; 8593 PetscInt benign_n; 8594 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8595 PetscBool isseqaij,need_change = PETSC_FALSE; 8596 PetscBool discrete_harmonic = PETSC_FALSE; 8597 8598 if (!pcbddc->use_vertices && reuse_solvers) { 8599 PetscInt n_vertices; 8600 8601 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8602 reuse_solvers = (PetscBool)!n_vertices; 8603 } 8604 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8605 if (!isseqaij) { 8606 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8607 if (matis->A == pcbddc->local_mat) { 8608 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8609 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8610 } else { 8611 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8612 } 8613 } 8614 if (!pcbddc->benign_change_explicit) { 8615 benign_n = pcbddc->benign_n; 8616 } else { 8617 benign_n = 0; 8618 } 8619 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8620 We need a global reduction to avoid possible deadlocks. 8621 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8622 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8623 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8624 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8625 need_change = (PetscBool)(!need_change); 8626 } 8627 /* If the user defines additional constraints, we import them here. 8628 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8629 if (need_change) { 8630 PC_IS *pcisf; 8631 PC_BDDC *pcbddcf; 8632 PC pcf; 8633 8634 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8635 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8636 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8637 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8638 8639 /* hacks */ 8640 pcisf = (PC_IS*)pcf->data; 8641 pcisf->is_B_local = pcis->is_B_local; 8642 pcisf->vec1_N = pcis->vec1_N; 8643 pcisf->BtoNmap = pcis->BtoNmap; 8644 pcisf->n = pcis->n; 8645 pcisf->n_B = pcis->n_B; 8646 pcbddcf = (PC_BDDC*)pcf->data; 8647 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8648 pcbddcf->mat_graph = pcbddc->mat_graph; 8649 pcbddcf->use_faces = PETSC_TRUE; 8650 pcbddcf->use_change_of_basis = PETSC_TRUE; 8651 pcbddcf->use_change_on_faces = PETSC_TRUE; 8652 pcbddcf->use_qr_single = PETSC_TRUE; 8653 pcbddcf->fake_change = PETSC_TRUE; 8654 8655 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8656 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8657 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8658 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8659 change = pcbddcf->ConstraintMatrix; 8660 pcbddcf->ConstraintMatrix = NULL; 8661 8662 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8663 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8664 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8665 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8666 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8667 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8668 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8669 pcf->ops->destroy = NULL; 8670 pcf->ops->reset = NULL; 8671 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8672 } 8673 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8674 8675 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 8676 if (iP) { 8677 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 8678 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 8679 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8680 } 8681 if (discrete_harmonic) { 8682 Mat A; 8683 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 8684 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 8685 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 8686 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8687 ierr = MatDestroy(&A);CHKERRQ(ierr); 8688 } else { 8689 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8690 } 8691 ierr = MatDestroy(&change);CHKERRQ(ierr); 8692 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8693 } 8694 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8695 8696 /* free adjacency */ 8697 if (free_used_adj) { 8698 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8699 } 8700 PetscFunctionReturn(0); 8701 } 8702 8703 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8704 { 8705 PC_IS *pcis=(PC_IS*)pc->data; 8706 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8707 PCBDDCGraph graph; 8708 PetscErrorCode ierr; 8709 8710 PetscFunctionBegin; 8711 /* attach interface graph for determining subsets */ 8712 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8713 IS verticesIS,verticescomm; 8714 PetscInt vsize,*idxs; 8715 8716 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8717 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8718 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8719 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8720 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8721 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8722 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8723 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8724 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8725 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8726 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8727 } else { 8728 graph = pcbddc->mat_graph; 8729 } 8730 /* print some info */ 8731 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8732 IS vertices; 8733 PetscInt nv,nedges,nfaces; 8734 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8735 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8736 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8737 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8738 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8739 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8740 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8741 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8742 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8743 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8744 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8745 } 8746 8747 /* sub_schurs init */ 8748 if (!pcbddc->sub_schurs) { 8749 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8750 } 8751 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8752 8753 /* free graph struct */ 8754 if (pcbddc->sub_schurs_rebuild) { 8755 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8756 } 8757 PetscFunctionReturn(0); 8758 } 8759 8760 PetscErrorCode PCBDDCCheckOperator(PC pc) 8761 { 8762 PC_IS *pcis=(PC_IS*)pc->data; 8763 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8764 PetscErrorCode ierr; 8765 8766 PetscFunctionBegin; 8767 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8768 IS zerodiag = NULL; 8769 Mat S_j,B0_B=NULL; 8770 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8771 PetscScalar *p0_check,*array,*array2; 8772 PetscReal norm; 8773 PetscInt i; 8774 8775 /* B0 and B0_B */ 8776 if (zerodiag) { 8777 IS dummy; 8778 8779 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8780 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8781 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8782 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8783 } 8784 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8785 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8786 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8787 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8788 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8789 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8790 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8791 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8792 /* S_j */ 8793 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8794 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8795 8796 /* mimic vector in \widetilde{W}_\Gamma */ 8797 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8798 /* continuous in primal space */ 8799 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8800 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8801 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8802 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8803 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8804 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8805 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8806 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8807 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8808 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8809 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8810 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8811 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8812 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8813 8814 /* assemble rhs for coarse problem */ 8815 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8816 /* local with Schur */ 8817 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8818 if (zerodiag) { 8819 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8820 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8821 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8822 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8823 } 8824 /* sum on primal nodes the local contributions */ 8825 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8826 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8827 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8828 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8829 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8830 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8831 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8832 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8833 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8834 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8835 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8836 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8837 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8838 /* scale primal nodes (BDDC sums contibutions) */ 8839 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8840 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8841 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8842 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8843 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8844 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8845 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8846 /* global: \widetilde{B0}_B w_\Gamma */ 8847 if (zerodiag) { 8848 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8849 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8850 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8851 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8852 } 8853 /* BDDC */ 8854 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8855 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8856 8857 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8858 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8859 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8860 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8861 for (i=0;i<pcbddc->benign_n;i++) { 8862 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8863 } 8864 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8865 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8866 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8867 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8868 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8869 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8870 } 8871 PetscFunctionReturn(0); 8872 } 8873 8874 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8875 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8876 { 8877 Mat At; 8878 IS rows; 8879 PetscInt rst,ren; 8880 PetscErrorCode ierr; 8881 PetscLayout rmap; 8882 8883 PetscFunctionBegin; 8884 rst = ren = 0; 8885 if (ccomm != MPI_COMM_NULL) { 8886 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8887 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8888 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8889 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8890 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8891 } 8892 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8893 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8894 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8895 8896 if (ccomm != MPI_COMM_NULL) { 8897 Mat_MPIAIJ *a,*b; 8898 IS from,to; 8899 Vec gvec; 8900 PetscInt lsize; 8901 8902 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8903 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8904 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8905 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8906 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8907 a = (Mat_MPIAIJ*)At->data; 8908 b = (Mat_MPIAIJ*)(*B)->data; 8909 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8910 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8911 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8912 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8913 b->A = a->A; 8914 b->B = a->B; 8915 8916 b->donotstash = a->donotstash; 8917 b->roworiented = a->roworiented; 8918 b->rowindices = 0; 8919 b->rowvalues = 0; 8920 b->getrowactive = PETSC_FALSE; 8921 8922 (*B)->rmap = rmap; 8923 (*B)->factortype = A->factortype; 8924 (*B)->assembled = PETSC_TRUE; 8925 (*B)->insertmode = NOT_SET_VALUES; 8926 (*B)->preallocated = PETSC_TRUE; 8927 8928 if (a->colmap) { 8929 #if defined(PETSC_USE_CTABLE) 8930 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8931 #else 8932 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8933 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8934 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8935 #endif 8936 } else b->colmap = 0; 8937 if (a->garray) { 8938 PetscInt len; 8939 len = a->B->cmap->n; 8940 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8941 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8942 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8943 } else b->garray = 0; 8944 8945 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8946 b->lvec = a->lvec; 8947 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8948 8949 /* cannot use VecScatterCopy */ 8950 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8951 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8952 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8953 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8954 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8955 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8956 ierr = ISDestroy(&from);CHKERRQ(ierr); 8957 ierr = ISDestroy(&to);CHKERRQ(ierr); 8958 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8959 } 8960 ierr = MatDestroy(&At);CHKERRQ(ierr); 8961 PetscFunctionReturn(0); 8962 } 8963