1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscdmplex.h> 5 #include <petscblaslapack.h> 6 #include <petsc/private/sfimpl.h> 7 #include <petsc/private/dmpleximpl.h> 8 9 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 10 11 /* if range is true, it returns B s.t. span{B} = range(A) 12 if range is false, it returns B s.t. range(B) _|_ range(A) */ 13 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 14 { 15 #if !defined(PETSC_USE_COMPLEX) 16 PetscScalar *uwork,*data,*U, ds = 0.; 17 PetscReal *sing; 18 PetscBLASInt bM,bN,lwork,lierr,di = 1; 19 PetscInt ulw,i,nr,nc,n; 20 PetscErrorCode ierr; 21 22 PetscFunctionBegin; 23 #if defined(PETSC_MISSING_LAPACK_GESVD) 24 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 25 #else 26 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 27 if (!nr || !nc) PetscFunctionReturn(0); 28 29 /* workspace */ 30 if (!work) { 31 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 32 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 33 } else { 34 ulw = lw; 35 uwork = work; 36 } 37 n = PetscMin(nr,nc); 38 if (!rwork) { 39 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 40 } else { 41 sing = rwork; 42 } 43 44 /* SVD */ 45 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 46 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 49 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 50 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 51 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 52 ierr = PetscFPTrapPop();CHKERRQ(ierr); 53 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 54 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 55 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 56 if (!rwork) { 57 ierr = PetscFree(sing);CHKERRQ(ierr); 58 } 59 if (!work) { 60 ierr = PetscFree(uwork);CHKERRQ(ierr); 61 } 62 /* create B */ 63 if (!range) { 64 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 65 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 66 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 67 } else { 68 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 69 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 70 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 71 } 72 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 73 ierr = PetscFree(U);CHKERRQ(ierr); 74 #endif 75 #else /* PETSC_USE_COMPLEX */ 76 PetscFunctionBegin; 77 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 78 #endif 79 PetscFunctionReturn(0); 80 } 81 82 /* TODO REMOVE */ 83 #if defined(PRINT_GDET) 84 static int inc = 0; 85 static int lev = 0; 86 #endif 87 88 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 89 { 90 PetscErrorCode ierr; 91 Mat GE,GEd; 92 PetscInt rsize,csize,esize; 93 PetscScalar *ptr; 94 95 PetscFunctionBegin; 96 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 97 if (!esize) PetscFunctionReturn(0); 98 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 99 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 100 101 /* gradients */ 102 ptr = work + 5*esize; 103 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 104 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 105 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 106 ierr = MatDestroy(&GE);CHKERRQ(ierr); 107 108 /* constants */ 109 ptr += rsize*csize; 110 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 111 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 112 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 113 ierr = MatDestroy(&GE);CHKERRQ(ierr); 114 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 115 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 116 117 if (corners) { 118 Mat GEc; 119 PetscScalar *vals,v; 120 121 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 122 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 123 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 124 /* v = PetscAbsScalar(vals[0]) */; 125 v = 1.; 126 cvals[0] = vals[0]/v; 127 cvals[1] = vals[1]/v; 128 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 129 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 130 #if defined(PRINT_GDET) 131 { 132 PetscViewer viewer; 133 char filename[256]; 134 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 135 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 136 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 137 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 138 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 140 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 141 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 142 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 143 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 144 } 145 #endif 146 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 147 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 148 } 149 150 PetscFunctionReturn(0); 151 } 152 153 PetscErrorCode PCBDDCNedelecSupport(PC pc) 154 { 155 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 156 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 157 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 158 Vec tvec; 159 PetscSF sfv; 160 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 161 MPI_Comm comm; 162 IS lned,primals,allprimals,nedfieldlocal; 163 IS *eedges,*extrows,*extcols,*alleedges; 164 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 165 PetscScalar *vals,*work; 166 PetscReal *rwork; 167 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 168 PetscInt ne,nv,Lv,order,n,field; 169 PetscInt n_neigh,*neigh,*n_shared,**shared; 170 PetscInt i,j,extmem,cum,maxsize,nee; 171 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 172 PetscInt *sfvleaves,*sfvroots; 173 PetscInt *corners,*cedges; 174 PetscInt *ecount,**eneighs,*vcount,**vneighs; 175 #if defined(PETSC_USE_DEBUG) 176 PetscInt *emarks; 177 #endif 178 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 179 PetscErrorCode ierr; 180 181 PetscFunctionBegin; 182 /* If the discrete gradient is defined for a subset of dofs and global is true, 183 it assumes G is given in global ordering for all the dofs. 184 Otherwise, the ordering is global for the Nedelec field */ 185 order = pcbddc->nedorder; 186 conforming = pcbddc->conforming; 187 field = pcbddc->nedfield; 188 global = pcbddc->nedglobal; 189 setprimal = PETSC_FALSE; 190 print = PETSC_FALSE; 191 singular = PETSC_FALSE; 192 193 /* Command line customization */ 194 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 195 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 196 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 197 ierr = PetscOptionsInt ("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 198 /* print debug info TODO: to be removed */ 199 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 200 ierr = PetscOptionsEnd();CHKERRQ(ierr); 201 202 /* Return if there are no edges in the decomposition and the problem is not singular */ 203 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 204 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 205 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 206 if (!singular) { 207 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 208 lrc[0] = PETSC_FALSE; 209 for (i=0;i<n;i++) { 210 if (PetscRealPart(vals[i]) > 2.) { 211 lrc[0] = PETSC_TRUE; 212 break; 213 } 214 } 215 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 216 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 217 if (!lrc[1]) PetscFunctionReturn(0); 218 } 219 220 /* Get Nedelec field */ 221 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 222 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 223 if (pcbddc->n_ISForDofsLocal && field >= 0) { 224 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 225 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 226 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 227 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 228 ne = n; 229 nedfieldlocal = NULL; 230 global = PETSC_TRUE; 231 } else if (field == PETSC_DECIDE) { 232 PetscInt rst,ren,*idx; 233 234 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 235 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 236 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 237 for (i=rst;i<ren;i++) { 238 PetscInt nc; 239 240 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 241 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 242 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 243 } 244 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 245 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 246 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 247 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 248 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 249 } else { 250 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 251 } 252 253 /* Sanity checks */ 254 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 255 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 256 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 257 258 /* Just set primal dofs and return */ 259 if (setprimal) { 260 IS enedfieldlocal; 261 PetscInt *eidxs; 262 263 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 264 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 265 if (nedfieldlocal) { 266 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 267 for (i=0,cum=0;i<ne;i++) { 268 if (PetscRealPart(vals[idxs[i]]) > 2.) { 269 eidxs[cum++] = idxs[i]; 270 } 271 } 272 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 273 } else { 274 for (i=0,cum=0;i<ne;i++) { 275 if (PetscRealPart(vals[i]) > 2.) { 276 eidxs[cum++] = i; 277 } 278 } 279 } 280 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 281 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 282 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 283 ierr = PetscFree(eidxs);CHKERRQ(ierr); 284 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 285 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 286 PetscFunctionReturn(0); 287 } 288 289 /* Compute some l2g maps */ 290 if (nedfieldlocal) { 291 IS is; 292 293 /* need to map from the local Nedelec field to local numbering */ 294 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 295 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 296 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 297 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 298 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 299 if (global) { 300 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 301 el2g = al2g; 302 } else { 303 IS gis; 304 305 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 306 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 307 ierr = ISDestroy(&gis);CHKERRQ(ierr); 308 } 309 ierr = ISDestroy(&is);CHKERRQ(ierr); 310 } else { 311 /* restore default */ 312 pcbddc->nedfield = -1; 313 /* one ref for the destruction of al2g, one for el2g */ 314 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 315 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 316 el2g = al2g; 317 fl2g = NULL; 318 } 319 320 /* Start communication to drop connections for interior edges (for cc analysis only) */ 321 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 322 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 323 if (nedfieldlocal) { 324 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 325 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 326 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 327 } else { 328 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 329 } 330 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 331 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 332 333 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 334 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 335 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 336 if (global) { 337 PetscInt rst; 338 339 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 340 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 341 if (matis->sf_rootdata[i] < 2) { 342 matis->sf_rootdata[cum++] = i + rst; 343 } 344 } 345 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 346 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 347 } else { 348 PetscInt *tbz; 349 350 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 351 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 352 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 353 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 354 for (i=0,cum=0;i<ne;i++) 355 if (matis->sf_leafdata[idxs[i]] == 1) 356 tbz[cum++] = i; 357 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 358 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 359 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 360 ierr = PetscFree(tbz);CHKERRQ(ierr); 361 } 362 } else { /* we need the entire G to infer the nullspace */ 363 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 364 G = pcbddc->discretegradient; 365 } 366 367 /* Extract subdomain relevant rows of G */ 368 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 369 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 370 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 371 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 372 ierr = ISDestroy(&lned);CHKERRQ(ierr); 373 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 374 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 375 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 376 377 /* SF for nodal dofs communications */ 378 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 379 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 380 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 381 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 382 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 384 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 386 i = singular ? 2 : 1; 387 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 388 389 /* Destroy temporary G created in MATIS format and modified G */ 390 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 391 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 392 ierr = MatDestroy(&G);CHKERRQ(ierr); 393 394 if (print) { 395 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 396 ierr = MatView(lG,NULL);CHKERRQ(ierr); 397 } 398 399 /* Save lG for values insertion in change of basis */ 400 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 401 402 /* Analyze the edge-nodes connections (duplicate lG) */ 403 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 404 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 405 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 406 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 407 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 409 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 410 /* need to import the boundary specification to ensure the 411 proper detection of coarse edges' endpoints */ 412 if (pcbddc->DirichletBoundariesLocal) { 413 IS is; 414 415 if (fl2g) { 416 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 417 } else { 418 is = pcbddc->DirichletBoundariesLocal; 419 } 420 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 421 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 422 for (i=0;i<cum;i++) { 423 if (idxs[i] >= 0) { 424 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 425 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 426 } 427 } 428 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 429 if (fl2g) { 430 ierr = ISDestroy(&is);CHKERRQ(ierr); 431 } 432 } 433 if (pcbddc->NeumannBoundariesLocal) { 434 IS is; 435 436 if (fl2g) { 437 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 438 } else { 439 is = pcbddc->NeumannBoundariesLocal; 440 } 441 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 442 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 443 for (i=0;i<cum;i++) { 444 if (idxs[i] >= 0) { 445 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 446 } 447 } 448 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 449 if (fl2g) { 450 ierr = ISDestroy(&is);CHKERRQ(ierr); 451 } 452 } 453 454 /* Count neighs per dof */ 455 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 456 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 457 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 458 for (i=1,cum=0;i<n_neigh;i++) { 459 cum += n_shared[i]; 460 for (j=0;j<n_shared[i];j++) { 461 ecount[shared[i][j]]++; 462 } 463 } 464 if (ne) { 465 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 466 } 467 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 468 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 469 for (i=1;i<n_neigh;i++) { 470 for (j=0;j<n_shared[i];j++) { 471 PetscInt k = shared[i][j]; 472 eneighs[k][ecount[k]] = neigh[i]; 473 ecount[k]++; 474 } 475 } 476 for (i=0;i<ne;i++) { 477 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 478 } 479 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 480 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 481 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 482 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 483 for (i=1,cum=0;i<n_neigh;i++) { 484 cum += n_shared[i]; 485 for (j=0;j<n_shared[i];j++) { 486 vcount[shared[i][j]]++; 487 } 488 } 489 if (nv) { 490 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 491 } 492 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 493 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 494 for (i=1;i<n_neigh;i++) { 495 for (j=0;j<n_shared[i];j++) { 496 PetscInt k = shared[i][j]; 497 vneighs[k][vcount[k]] = neigh[i]; 498 vcount[k]++; 499 } 500 } 501 for (i=0;i<nv;i++) { 502 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 503 } 504 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 505 506 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 507 for proper detection of coarse edges' endpoints */ 508 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 509 for (i=0;i<ne;i++) { 510 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 511 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 512 } 513 } 514 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 515 if (!conforming) { 516 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 517 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 518 } 519 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 520 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 521 cum = 0; 522 for (i=0;i<ne;i++) { 523 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 524 if (!PetscBTLookup(btee,i)) { 525 marks[cum++] = i; 526 continue; 527 } 528 /* set badly connected edge dofs as primal */ 529 if (!conforming) { 530 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 531 marks[cum++] = i; 532 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 533 for (j=ii[i];j<ii[i+1];j++) { 534 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 535 } 536 } else { 537 /* every edge dofs should be connected trough a certain number of nodal dofs 538 to other edge dofs belonging to coarse edges 539 - at most 2 endpoints 540 - order-1 interior nodal dofs 541 - no undefined nodal dofs (nconn < order) 542 */ 543 PetscInt ends = 0,ints = 0, undef = 0; 544 for (j=ii[i];j<ii[i+1];j++) { 545 PetscInt v = jj[j],k; 546 PetscInt nconn = iit[v+1]-iit[v]; 547 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 548 if (nconn > order) ends++; 549 else if (nconn == order) ints++; 550 else undef++; 551 } 552 if (undef || ends > 2 || ints != order -1) { 553 marks[cum++] = i; 554 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 555 for (j=ii[i];j<ii[i+1];j++) { 556 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 557 } 558 } 559 } 560 } 561 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 562 if (!order && ii[i+1] != ii[i]) { 563 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 564 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 565 } 566 } 567 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 568 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 569 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 570 if (!conforming) { 571 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 572 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 573 } 574 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 575 576 /* identify splitpoints and corner candidates */ 577 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 578 if (print) { 579 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 580 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 581 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 582 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 583 } 584 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 585 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 586 for (i=0;i<nv;i++) { 587 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 588 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 589 if (!order) { /* variable order */ 590 PetscReal vorder = 0.; 591 592 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 593 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 594 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 595 ord = 1; 596 } 597 #if defined(PETSC_USE_DEBUG) 598 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 599 #endif 600 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 601 if (PetscBTLookup(btbd,jj[j])) { 602 bdir = PETSC_TRUE; 603 break; 604 } 605 if (vc != ecount[jj[j]]) { 606 sneighs = PETSC_FALSE; 607 } else { 608 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 609 for (k=0;k<vc;k++) { 610 if (vn[k] != en[k]) { 611 sneighs = PETSC_FALSE; 612 break; 613 } 614 } 615 } 616 } 617 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 618 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 619 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 620 } else if (test == ord) { 621 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 622 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 623 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 624 } else { 625 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 626 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 627 } 628 } 629 } 630 ierr = PetscFree(ecount);CHKERRQ(ierr); 631 ierr = PetscFree(vcount);CHKERRQ(ierr); 632 if (ne) { 633 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 634 } 635 if (nv) { 636 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 637 } 638 ierr = PetscFree(eneighs);CHKERRQ(ierr); 639 ierr = PetscFree(vneighs);CHKERRQ(ierr); 640 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 641 642 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 643 if (order != 1) { 644 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 645 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 646 for (i=0;i<nv;i++) { 647 if (PetscBTLookup(btvcand,i)) { 648 PetscBool found = PETSC_FALSE; 649 for (j=ii[i];j<ii[i+1] && !found;j++) { 650 PetscInt k,e = jj[j]; 651 if (PetscBTLookup(bte,e)) continue; 652 for (k=iit[e];k<iit[e+1];k++) { 653 PetscInt v = jjt[k]; 654 if (v != i && PetscBTLookup(btvcand,v)) { 655 found = PETSC_TRUE; 656 break; 657 } 658 } 659 } 660 if (!found) { 661 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 662 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 663 } else { 664 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 665 } 666 } 667 } 668 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 669 } 670 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 671 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 672 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 673 674 /* Get the local G^T explicitly */ 675 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 676 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 677 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 678 679 /* Mark interior nodal dofs */ 680 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 681 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 682 for (i=1;i<n_neigh;i++) { 683 for (j=0;j<n_shared[i];j++) { 684 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 685 } 686 } 687 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 688 689 /* communicate corners and splitpoints */ 690 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 691 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 692 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 693 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 694 695 if (print) { 696 IS tbz; 697 698 cum = 0; 699 for (i=0;i<nv;i++) 700 if (sfvleaves[i]) 701 vmarks[cum++] = i; 702 703 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 704 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 705 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 706 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 707 } 708 709 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 710 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 711 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 712 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 713 714 /* Zero rows of lGt corresponding to identified corners 715 and interior nodal dofs */ 716 cum = 0; 717 for (i=0;i<nv;i++) { 718 if (sfvleaves[i]) { 719 vmarks[cum++] = i; 720 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 721 } 722 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 723 } 724 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 725 if (print) { 726 IS tbz; 727 728 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 729 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 730 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 731 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 732 } 733 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 734 ierr = PetscFree(vmarks);CHKERRQ(ierr); 735 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 736 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 737 738 /* Recompute G */ 739 ierr = MatDestroy(&lG);CHKERRQ(ierr); 740 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 741 if (print) { 742 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 743 ierr = MatView(lG,NULL);CHKERRQ(ierr); 744 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 745 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 746 } 747 748 /* Get primal dofs (if any) */ 749 cum = 0; 750 for (i=0;i<ne;i++) { 751 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 752 } 753 if (fl2g) { 754 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 755 } 756 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 757 if (print) { 758 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 759 ierr = ISView(primals,NULL);CHKERRQ(ierr); 760 } 761 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 762 /* TODO: what if the user passed in some of them ? */ 763 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 764 ierr = ISDestroy(&primals);CHKERRQ(ierr); 765 766 /* Compute edge connectivity */ 767 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 768 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 769 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 770 if (fl2g) { 771 PetscBT btf; 772 PetscInt *iia,*jja,*iiu,*jju; 773 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 774 775 /* create CSR for all local dofs */ 776 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 777 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 778 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 779 iiu = pcbddc->mat_graph->xadj; 780 jju = pcbddc->mat_graph->adjncy; 781 } else if (pcbddc->use_local_adj) { 782 rest = PETSC_TRUE; 783 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 784 } else { 785 free = PETSC_TRUE; 786 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 787 iiu[0] = 0; 788 for (i=0;i<n;i++) { 789 iiu[i+1] = i+1; 790 jju[i] = -1; 791 } 792 } 793 794 /* import sizes of CSR */ 795 iia[0] = 0; 796 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 797 798 /* overwrite entries corresponding to the Nedelec field */ 799 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 800 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 801 for (i=0;i<ne;i++) { 802 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 803 iia[idxs[i]+1] = ii[i+1]-ii[i]; 804 } 805 806 /* iia in CSR */ 807 for (i=0;i<n;i++) iia[i+1] += iia[i]; 808 809 /* jja in CSR */ 810 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 811 for (i=0;i<n;i++) 812 if (!PetscBTLookup(btf,i)) 813 for (j=0;j<iiu[i+1]-iiu[i];j++) 814 jja[iia[i]+j] = jju[iiu[i]+j]; 815 816 /* map edge dofs connectivity */ 817 if (jj) { 818 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 819 for (i=0;i<ne;i++) { 820 PetscInt e = idxs[i]; 821 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 822 } 823 } 824 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 825 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 826 if (rest) { 827 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 828 } 829 if (free) { 830 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 831 } 832 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 833 } else { 834 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 835 } 836 837 /* Analyze interface for edge dofs */ 838 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 839 pcbddc->mat_graph->twodim = PETSC_FALSE; 840 841 /* Get coarse edges in the edge space */ 842 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 843 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 844 845 if (fl2g) { 846 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 847 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 848 for (i=0;i<nee;i++) { 849 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 850 } 851 } else { 852 eedges = alleedges; 853 primals = allprimals; 854 } 855 856 /* Mark fine edge dofs with their coarse edge id */ 857 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 858 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 859 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 860 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 861 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 862 if (print) { 863 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 864 ierr = ISView(primals,NULL);CHKERRQ(ierr); 865 } 866 867 maxsize = 0; 868 for (i=0;i<nee;i++) { 869 PetscInt size,mark = i+1; 870 871 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 872 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 873 for (j=0;j<size;j++) marks[idxs[j]] = mark; 874 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 875 maxsize = PetscMax(maxsize,size); 876 } 877 878 /* Find coarse edge endpoints */ 879 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 880 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 881 for (i=0;i<nee;i++) { 882 PetscInt mark = i+1,size; 883 884 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 885 if (!size && nedfieldlocal) continue; 886 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 887 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 888 if (print) { 889 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 890 ISView(eedges[i],NULL); 891 } 892 for (j=0;j<size;j++) { 893 PetscInt k, ee = idxs[j]; 894 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 895 for (k=ii[ee];k<ii[ee+1];k++) { 896 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 897 if (PetscBTLookup(btv,jj[k])) { 898 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 899 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 900 PetscInt k2; 901 PetscBool corner = PETSC_FALSE; 902 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 903 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 904 /* it's a corner if either is connected with an edge dof belonging to a different cc or 905 if the edge dof lie on the natural part of the boundary */ 906 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 907 corner = PETSC_TRUE; 908 break; 909 } 910 } 911 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 912 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 913 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 914 } else { 915 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 916 } 917 } 918 } 919 } 920 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 921 } 922 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 923 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 924 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 925 926 /* Reset marked primal dofs */ 927 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 928 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 929 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 930 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 931 932 /* Now use the initial lG */ 933 ierr = MatDestroy(&lG);CHKERRQ(ierr); 934 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 935 lG = lGinit; 936 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 937 938 /* Compute extended cols indices */ 939 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 940 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 941 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 942 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 943 i *= maxsize; 944 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 945 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 946 eerr = PETSC_FALSE; 947 for (i=0;i<nee;i++) { 948 PetscInt size,found = 0; 949 950 cum = 0; 951 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 952 if (!size && nedfieldlocal) continue; 953 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 954 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 955 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 956 for (j=0;j<size;j++) { 957 PetscInt k,ee = idxs[j]; 958 for (k=ii[ee];k<ii[ee+1];k++) { 959 PetscInt vv = jj[k]; 960 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 961 else if (!PetscBTLookupSet(btvc,vv)) found++; 962 } 963 } 964 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 965 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 966 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 967 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 968 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 969 /* it may happen that endpoints are not defined at this point 970 if it is the case, mark this edge for a second pass */ 971 if (cum != size -1 || found != 2) { 972 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 973 if (print) { 974 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 975 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 976 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 977 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 978 } 979 eerr = PETSC_TRUE; 980 } 981 } 982 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 983 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 984 if (done) { 985 PetscInt *newprimals; 986 987 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 988 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 989 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 990 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 991 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 992 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 993 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 994 for (i=0;i<nee;i++) { 995 PetscBool has_candidates = PETSC_FALSE; 996 if (PetscBTLookup(bter,i)) { 997 PetscInt size,mark = i+1; 998 999 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1000 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1001 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1002 for (j=0;j<size;j++) { 1003 PetscInt k,ee = idxs[j]; 1004 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1005 for (k=ii[ee];k<ii[ee+1];k++) { 1006 /* set all candidates located on the edge as corners */ 1007 if (PetscBTLookup(btvcand,jj[k])) { 1008 PetscInt k2,vv = jj[k]; 1009 has_candidates = PETSC_TRUE; 1010 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1011 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1012 /* set all edge dofs connected to candidate as primals */ 1013 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1014 if (marks[jjt[k2]] == mark) { 1015 PetscInt k3,ee2 = jjt[k2]; 1016 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1017 newprimals[cum++] = ee2; 1018 /* finally set the new corners */ 1019 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1020 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1021 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1022 } 1023 } 1024 } 1025 } else { 1026 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1027 } 1028 } 1029 } 1030 if (!has_candidates) { /* circular edge */ 1031 PetscInt k, ee = idxs[0],*tmarks; 1032 1033 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1034 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1035 for (k=ii[ee];k<ii[ee+1];k++) { 1036 PetscInt k2; 1037 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1038 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1039 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1040 } 1041 for (j=0;j<size;j++) { 1042 if (tmarks[idxs[j]] > 1) { 1043 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1044 newprimals[cum++] = idxs[j]; 1045 } 1046 } 1047 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1048 } 1049 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1050 } 1051 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1052 } 1053 ierr = PetscFree(extcols);CHKERRQ(ierr); 1054 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1055 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1056 if (fl2g) { 1057 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1058 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1059 for (i=0;i<nee;i++) { 1060 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1061 } 1062 ierr = PetscFree(eedges);CHKERRQ(ierr); 1063 } 1064 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1065 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1066 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1067 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1068 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1069 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1070 pcbddc->mat_graph->twodim = PETSC_FALSE; 1071 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1072 if (fl2g) { 1073 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1074 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1075 for (i=0;i<nee;i++) { 1076 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1077 } 1078 } else { 1079 eedges = alleedges; 1080 primals = allprimals; 1081 } 1082 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1083 1084 /* Mark again */ 1085 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1086 for (i=0;i<nee;i++) { 1087 PetscInt size,mark = i+1; 1088 1089 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1090 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1091 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1092 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1093 } 1094 if (print) { 1095 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1096 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1097 } 1098 1099 /* Recompute extended cols */ 1100 eerr = PETSC_FALSE; 1101 for (i=0;i<nee;i++) { 1102 PetscInt size; 1103 1104 cum = 0; 1105 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1106 if (!size && nedfieldlocal) continue; 1107 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1108 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1109 for (j=0;j<size;j++) { 1110 PetscInt k,ee = idxs[j]; 1111 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1112 } 1113 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1114 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1115 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1116 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1117 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1118 if (cum != size -1) { 1119 if (print) { 1120 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1121 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1122 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1123 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1124 } 1125 eerr = PETSC_TRUE; 1126 } 1127 } 1128 } 1129 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1130 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1131 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1132 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1133 /* an error should not occur at this point */ 1134 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1135 1136 /* Check the number of endpoints */ 1137 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1138 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1139 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1140 for (i=0;i<nee;i++) { 1141 PetscInt size, found = 0, gc[2]; 1142 1143 /* init with defaults */ 1144 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1145 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1146 if (!size && nedfieldlocal) continue; 1147 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1148 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1149 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1150 for (j=0;j<size;j++) { 1151 PetscInt k,ee = idxs[j]; 1152 for (k=ii[ee];k<ii[ee+1];k++) { 1153 PetscInt vv = jj[k]; 1154 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1155 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1156 corners[i*2+found++] = vv; 1157 } 1158 } 1159 } 1160 if (found != 2) { 1161 PetscInt e; 1162 if (fl2g) { 1163 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1164 } else { 1165 e = idxs[0]; 1166 } 1167 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1168 } 1169 1170 /* get primal dof index on this coarse edge */ 1171 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1172 if (gc[0] > gc[1]) { 1173 PetscInt swap = corners[2*i]; 1174 corners[2*i] = corners[2*i+1]; 1175 corners[2*i+1] = swap; 1176 } 1177 cedges[i] = idxs[size-1]; 1178 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1179 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1180 } 1181 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1182 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1183 1184 #if defined(PETSC_USE_DEBUG) 1185 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1186 not interfere with neighbouring coarse edges */ 1187 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1188 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1189 for (i=0;i<nv;i++) { 1190 PetscInt emax = 0,eemax = 0; 1191 1192 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1193 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1194 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1195 for (j=1;j<nee+1;j++) { 1196 if (emax < emarks[j]) { 1197 emax = emarks[j]; 1198 eemax = j; 1199 } 1200 } 1201 /* not relevant for edges */ 1202 if (!eemax) continue; 1203 1204 for (j=ii[i];j<ii[i+1];j++) { 1205 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1206 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1207 } 1208 } 1209 } 1210 ierr = PetscFree(emarks);CHKERRQ(ierr); 1211 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1212 #endif 1213 1214 /* Compute extended rows indices for edge blocks of the change of basis */ 1215 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1216 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1217 extmem *= maxsize; 1218 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1219 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1220 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1221 for (i=0;i<nv;i++) { 1222 PetscInt mark = 0,size,start; 1223 1224 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1225 for (j=ii[i];j<ii[i+1];j++) 1226 if (marks[jj[j]] && !mark) 1227 mark = marks[jj[j]]; 1228 1229 /* not relevant */ 1230 if (!mark) continue; 1231 1232 /* import extended row */ 1233 mark--; 1234 start = mark*extmem+extrowcum[mark]; 1235 size = ii[i+1]-ii[i]; 1236 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1237 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1238 extrowcum[mark] += size; 1239 } 1240 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1241 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1242 ierr = PetscFree(marks);CHKERRQ(ierr); 1243 1244 /* Compress extrows */ 1245 cum = 0; 1246 for (i=0;i<nee;i++) { 1247 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1248 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1249 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1250 cum = PetscMax(cum,size); 1251 } 1252 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1253 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1254 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1255 1256 /* Workspace for lapack inner calls and VecSetValues */ 1257 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1258 1259 /* Create change of basis matrix (preallocation can be improved) */ 1260 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1261 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1262 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1263 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1264 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1265 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1266 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1267 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1268 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1269 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1270 1271 /* Defaults to identity */ 1272 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1273 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1274 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1275 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1276 1277 /* Create discrete gradient for the coarser level if needed */ 1278 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1279 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1280 if (pcbddc->current_level < pcbddc->max_levels) { 1281 ISLocalToGlobalMapping cel2g,cvl2g; 1282 IS wis,gwis; 1283 PetscInt cnv,cne; 1284 1285 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1286 if (fl2g) { 1287 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1288 } else { 1289 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1290 pcbddc->nedclocal = wis; 1291 } 1292 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1293 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1294 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1295 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1296 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1297 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1298 1299 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1300 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1301 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1302 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1303 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1304 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1305 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1306 1307 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1308 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1309 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1310 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1311 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1312 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1313 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1314 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1315 } 1316 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1317 1318 #if defined(PRINT_GDET) 1319 inc = 0; 1320 lev = pcbddc->current_level; 1321 #endif 1322 1323 /* Insert values in the change of basis matrix */ 1324 for (i=0;i<nee;i++) { 1325 Mat Gins = NULL, GKins = NULL; 1326 IS cornersis = NULL; 1327 PetscScalar cvals[2]; 1328 1329 if (pcbddc->nedcG) { 1330 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1331 } 1332 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1333 if (Gins && GKins) { 1334 PetscScalar *data; 1335 const PetscInt *rows,*cols; 1336 PetscInt nrh,nch,nrc,ncc; 1337 1338 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1339 /* H1 */ 1340 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1341 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1342 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1343 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1344 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1345 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1346 /* complement */ 1347 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1348 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1349 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1350 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1351 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1352 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1353 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1354 1355 /* coarse discrete gradient */ 1356 if (pcbddc->nedcG) { 1357 PetscInt cols[2]; 1358 1359 cols[0] = 2*i; 1360 cols[1] = 2*i+1; 1361 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1362 } 1363 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1364 } 1365 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1366 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1367 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1368 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1369 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1370 } 1371 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1372 1373 /* Start assembling */ 1374 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1375 if (pcbddc->nedcG) { 1376 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1377 } 1378 1379 /* Free */ 1380 if (fl2g) { 1381 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1382 for (i=0;i<nee;i++) { 1383 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1384 } 1385 ierr = PetscFree(eedges);CHKERRQ(ierr); 1386 } 1387 1388 /* hack mat_graph with primal dofs on the coarse edges */ 1389 { 1390 PCBDDCGraph graph = pcbddc->mat_graph; 1391 PetscInt *oqueue = graph->queue; 1392 PetscInt *ocptr = graph->cptr; 1393 PetscInt ncc,*idxs; 1394 1395 /* find first primal edge */ 1396 if (pcbddc->nedclocal) { 1397 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1398 } else { 1399 if (fl2g) { 1400 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1401 } 1402 idxs = cedges; 1403 } 1404 cum = 0; 1405 while (cum < nee && cedges[cum] < 0) cum++; 1406 1407 /* adapt connected components */ 1408 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1409 graph->cptr[0] = 0; 1410 for (i=0,ncc=0;i<graph->ncc;i++) { 1411 PetscInt lc = ocptr[i+1]-ocptr[i]; 1412 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1413 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1414 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1415 ncc++; 1416 lc--; 1417 cum++; 1418 while (cum < nee && cedges[cum] < 0) cum++; 1419 } 1420 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1421 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1422 ncc++; 1423 } 1424 graph->ncc = ncc; 1425 if (pcbddc->nedclocal) { 1426 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1427 } 1428 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1429 } 1430 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1431 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1432 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1433 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1434 1435 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1436 ierr = PetscFree(extrow);CHKERRQ(ierr); 1437 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1438 ierr = PetscFree(corners);CHKERRQ(ierr); 1439 ierr = PetscFree(cedges);CHKERRQ(ierr); 1440 ierr = PetscFree(extrows);CHKERRQ(ierr); 1441 ierr = PetscFree(extcols);CHKERRQ(ierr); 1442 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1443 1444 /* Complete assembling */ 1445 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1446 if (pcbddc->nedcG) { 1447 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1448 #if 0 1449 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1450 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1451 #endif 1452 } 1453 1454 /* set change of basis */ 1455 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1456 ierr = MatDestroy(&T);CHKERRQ(ierr); 1457 1458 PetscFunctionReturn(0); 1459 } 1460 1461 /* the near-null space of BDDC carries information on quadrature weights, 1462 and these can be collinear -> so cheat with MatNullSpaceCreate 1463 and create a suitable set of basis vectors first */ 1464 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1465 { 1466 PetscErrorCode ierr; 1467 PetscInt i; 1468 1469 PetscFunctionBegin; 1470 for (i=0;i<nvecs;i++) { 1471 PetscInt first,last; 1472 1473 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1474 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1475 if (i>=first && i < last) { 1476 PetscScalar *data; 1477 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1478 if (!has_const) { 1479 data[i-first] = 1.; 1480 } else { 1481 data[2*i-first] = 1./PetscSqrtReal(2.); 1482 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1483 } 1484 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1485 } 1486 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1487 } 1488 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1489 for (i=0;i<nvecs;i++) { /* reset vectors */ 1490 PetscInt first,last; 1491 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1492 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1493 if (i>=first && i < last) { 1494 PetscScalar *data; 1495 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1496 if (!has_const) { 1497 data[i-first] = 0.; 1498 } else { 1499 data[2*i-first] = 0.; 1500 data[2*i-first+1] = 0.; 1501 } 1502 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1503 } 1504 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1505 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1506 } 1507 PetscFunctionReturn(0); 1508 } 1509 1510 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1511 { 1512 Mat loc_divudotp; 1513 Vec p,v,vins,quad_vec,*quad_vecs; 1514 ISLocalToGlobalMapping map; 1515 IS *faces,*edges; 1516 PetscScalar *vals; 1517 const PetscScalar *array; 1518 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1519 PetscMPIInt rank; 1520 PetscErrorCode ierr; 1521 1522 PetscFunctionBegin; 1523 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1524 if (graph->twodim) { 1525 lmaxneighs = 2; 1526 } else { 1527 lmaxneighs = 1; 1528 for (i=0;i<ne;i++) { 1529 const PetscInt *idxs; 1530 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1531 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1532 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1533 } 1534 lmaxneighs++; /* graph count does not include self */ 1535 } 1536 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1537 maxsize = 0; 1538 for (i=0;i<ne;i++) { 1539 PetscInt nn; 1540 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1541 maxsize = PetscMax(maxsize,nn); 1542 } 1543 for (i=0;i<nf;i++) { 1544 PetscInt nn; 1545 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1546 maxsize = PetscMax(maxsize,nn); 1547 } 1548 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1549 /* create vectors to hold quadrature weights */ 1550 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1551 if (!transpose) { 1552 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1553 } else { 1554 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1555 } 1556 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1557 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1558 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1559 for (i=0;i<maxneighs;i++) { 1560 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1561 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1562 } 1563 1564 /* compute local quad vec */ 1565 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1566 if (!transpose) { 1567 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1568 } else { 1569 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1570 } 1571 ierr = VecSet(p,1.);CHKERRQ(ierr); 1572 if (!transpose) { 1573 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1574 } else { 1575 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1576 } 1577 if (vl2l) { 1578 Mat lA; 1579 VecScatter sc; 1580 1581 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1582 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1583 ierr = VecScatterCreate(v,vl2l,vins,NULL,&sc);CHKERRQ(ierr); 1584 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1585 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1586 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1587 } else { 1588 vins = v; 1589 } 1590 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1591 ierr = VecDestroy(&p);CHKERRQ(ierr); 1592 1593 /* insert in global quadrature vecs */ 1594 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1595 for (i=0;i<nf;i++) { 1596 const PetscInt *idxs; 1597 PetscInt idx,nn,j; 1598 1599 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1600 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1601 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1602 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1603 idx = -(idx+1); 1604 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1605 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1606 } 1607 for (i=0;i<ne;i++) { 1608 const PetscInt *idxs; 1609 PetscInt idx,nn,j; 1610 1611 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1612 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1613 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1614 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1615 idx = -(idx+1); 1616 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1617 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1618 } 1619 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1620 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1621 if (vl2l) { 1622 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1623 } 1624 ierr = VecDestroy(&v);CHKERRQ(ierr); 1625 ierr = PetscFree(vals);CHKERRQ(ierr); 1626 1627 /* assemble near null space */ 1628 for (i=0;i<maxneighs;i++) { 1629 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1630 } 1631 for (i=0;i<maxneighs;i++) { 1632 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1633 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1634 } 1635 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1636 PetscFunctionReturn(0); 1637 } 1638 1639 1640 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1641 { 1642 PetscErrorCode ierr; 1643 Vec local,global; 1644 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1645 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1646 1647 PetscFunctionBegin; 1648 /* need to convert from global to local topology information and remove references to information in global ordering */ 1649 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1650 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1651 if (pcbddc->user_provided_isfordofs) { 1652 if (pcbddc->n_ISForDofs) { 1653 PetscInt i; 1654 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1655 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1656 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1657 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1658 } 1659 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1660 pcbddc->n_ISForDofs = 0; 1661 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1662 } 1663 } else { 1664 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1665 DM dm; 1666 1667 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1668 if (!dm) { 1669 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1670 } 1671 if (dm) { 1672 IS *fields; 1673 PetscInt nf,i; 1674 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1675 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1676 for (i=0;i<nf;i++) { 1677 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1678 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1679 } 1680 ierr = PetscFree(fields);CHKERRQ(ierr); 1681 pcbddc->n_ISForDofsLocal = nf; 1682 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1683 PetscContainer c; 1684 1685 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1686 if (c) { 1687 MatISLocalFields lf; 1688 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1689 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1690 } else { /* fallback, create the default fields if bs > 1 */ 1691 PetscInt i, n = matis->A->rmap->n; 1692 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1693 if (i > 1) { 1694 pcbddc->n_ISForDofsLocal = i; 1695 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1696 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1697 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1698 } 1699 } 1700 } 1701 } 1702 } else { 1703 PetscInt i; 1704 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1705 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1706 } 1707 } 1708 } 1709 1710 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1711 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1712 } else if (pcbddc->DirichletBoundariesLocal) { 1713 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1714 } 1715 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1716 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1717 } else if (pcbddc->NeumannBoundariesLocal) { 1718 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1719 } 1720 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1721 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1722 } 1723 ierr = VecDestroy(&global);CHKERRQ(ierr); 1724 ierr = VecDestroy(&local);CHKERRQ(ierr); 1725 1726 PetscFunctionReturn(0); 1727 } 1728 1729 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1730 { 1731 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1732 PetscErrorCode ierr; 1733 IS nis; 1734 const PetscInt *idxs; 1735 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1736 PetscBool *ld; 1737 1738 PetscFunctionBegin; 1739 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1740 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1741 if (mop == MPI_LAND) { 1742 /* init rootdata with true */ 1743 ld = (PetscBool*) matis->sf_rootdata; 1744 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1745 } else { 1746 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1747 } 1748 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1749 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1750 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1751 ld = (PetscBool*) matis->sf_leafdata; 1752 for (i=0;i<nd;i++) 1753 if (-1 < idxs[i] && idxs[i] < n) 1754 ld[idxs[i]] = PETSC_TRUE; 1755 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1756 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1757 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1758 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1759 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1760 if (mop == MPI_LAND) { 1761 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1762 } else { 1763 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1764 } 1765 for (i=0,nnd=0;i<n;i++) 1766 if (ld[i]) 1767 nidxs[nnd++] = i; 1768 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1769 ierr = ISDestroy(is);CHKERRQ(ierr); 1770 *is = nis; 1771 PetscFunctionReturn(0); 1772 } 1773 1774 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1775 { 1776 PC_IS *pcis = (PC_IS*)(pc->data); 1777 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1778 PetscErrorCode ierr; 1779 1780 PetscFunctionBegin; 1781 if (!pcbddc->benign_have_null) { 1782 PetscFunctionReturn(0); 1783 } 1784 if (pcbddc->ChangeOfBasisMatrix) { 1785 Vec swap; 1786 1787 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1788 swap = pcbddc->work_change; 1789 pcbddc->work_change = r; 1790 r = swap; 1791 } 1792 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1793 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1794 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1795 ierr = VecSet(z,0.);CHKERRQ(ierr); 1796 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1797 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1798 if (pcbddc->ChangeOfBasisMatrix) { 1799 pcbddc->work_change = r; 1800 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1801 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1802 } 1803 PetscFunctionReturn(0); 1804 } 1805 1806 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1807 { 1808 PCBDDCBenignMatMult_ctx ctx; 1809 PetscErrorCode ierr; 1810 PetscBool apply_right,apply_left,reset_x; 1811 1812 PetscFunctionBegin; 1813 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1814 if (transpose) { 1815 apply_right = ctx->apply_left; 1816 apply_left = ctx->apply_right; 1817 } else { 1818 apply_right = ctx->apply_right; 1819 apply_left = ctx->apply_left; 1820 } 1821 reset_x = PETSC_FALSE; 1822 if (apply_right) { 1823 const PetscScalar *ax; 1824 PetscInt nl,i; 1825 1826 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1827 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1828 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1829 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1830 for (i=0;i<ctx->benign_n;i++) { 1831 PetscScalar sum,val; 1832 const PetscInt *idxs; 1833 PetscInt nz,j; 1834 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1835 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1836 sum = 0.; 1837 if (ctx->apply_p0) { 1838 val = ctx->work[idxs[nz-1]]; 1839 for (j=0;j<nz-1;j++) { 1840 sum += ctx->work[idxs[j]]; 1841 ctx->work[idxs[j]] += val; 1842 } 1843 } else { 1844 for (j=0;j<nz-1;j++) { 1845 sum += ctx->work[idxs[j]]; 1846 } 1847 } 1848 ctx->work[idxs[nz-1]] -= sum; 1849 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1850 } 1851 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1852 reset_x = PETSC_TRUE; 1853 } 1854 if (transpose) { 1855 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1856 } else { 1857 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1858 } 1859 if (reset_x) { 1860 ierr = VecResetArray(x);CHKERRQ(ierr); 1861 } 1862 if (apply_left) { 1863 PetscScalar *ay; 1864 PetscInt i; 1865 1866 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1867 for (i=0;i<ctx->benign_n;i++) { 1868 PetscScalar sum,val; 1869 const PetscInt *idxs; 1870 PetscInt nz,j; 1871 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1872 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1873 val = -ay[idxs[nz-1]]; 1874 if (ctx->apply_p0) { 1875 sum = 0.; 1876 for (j=0;j<nz-1;j++) { 1877 sum += ay[idxs[j]]; 1878 ay[idxs[j]] += val; 1879 } 1880 ay[idxs[nz-1]] += sum; 1881 } else { 1882 for (j=0;j<nz-1;j++) { 1883 ay[idxs[j]] += val; 1884 } 1885 ay[idxs[nz-1]] = 0.; 1886 } 1887 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1888 } 1889 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1890 } 1891 PetscFunctionReturn(0); 1892 } 1893 1894 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1895 { 1896 PetscErrorCode ierr; 1897 1898 PetscFunctionBegin; 1899 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1900 PetscFunctionReturn(0); 1901 } 1902 1903 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1904 { 1905 PetscErrorCode ierr; 1906 1907 PetscFunctionBegin; 1908 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1909 PetscFunctionReturn(0); 1910 } 1911 1912 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1913 { 1914 PC_IS *pcis = (PC_IS*)pc->data; 1915 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1916 PCBDDCBenignMatMult_ctx ctx; 1917 PetscErrorCode ierr; 1918 1919 PetscFunctionBegin; 1920 if (!restore) { 1921 Mat A_IB,A_BI; 1922 PetscScalar *work; 1923 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1924 1925 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1926 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1927 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1928 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1929 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1930 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1931 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1932 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1933 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1934 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1935 ctx->apply_left = PETSC_TRUE; 1936 ctx->apply_right = PETSC_FALSE; 1937 ctx->apply_p0 = PETSC_FALSE; 1938 ctx->benign_n = pcbddc->benign_n; 1939 if (reuse) { 1940 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1941 ctx->free = PETSC_FALSE; 1942 } else { /* TODO: could be optimized for successive solves */ 1943 ISLocalToGlobalMapping N_to_D; 1944 PetscInt i; 1945 1946 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1947 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1948 for (i=0;i<pcbddc->benign_n;i++) { 1949 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1950 } 1951 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 1952 ctx->free = PETSC_TRUE; 1953 } 1954 ctx->A = pcis->A_IB; 1955 ctx->work = work; 1956 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 1957 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1958 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1959 pcis->A_IB = A_IB; 1960 1961 /* A_BI as A_IB^T */ 1962 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 1963 pcbddc->benign_original_mat = pcis->A_BI; 1964 pcis->A_BI = A_BI; 1965 } else { 1966 if (!pcbddc->benign_original_mat) { 1967 PetscFunctionReturn(0); 1968 } 1969 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 1970 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 1971 pcis->A_IB = ctx->A; 1972 ctx->A = NULL; 1973 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 1974 pcis->A_BI = pcbddc->benign_original_mat; 1975 pcbddc->benign_original_mat = NULL; 1976 if (ctx->free) { 1977 PetscInt i; 1978 for (i=0;i<ctx->benign_n;i++) { 1979 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 1980 } 1981 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 1982 } 1983 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 1984 ierr = PetscFree(ctx);CHKERRQ(ierr); 1985 } 1986 PetscFunctionReturn(0); 1987 } 1988 1989 /* used just in bddc debug mode */ 1990 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 1991 { 1992 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1993 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1994 Mat An; 1995 PetscErrorCode ierr; 1996 1997 PetscFunctionBegin; 1998 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 1999 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2000 if (is1) { 2001 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2002 ierr = MatDestroy(&An);CHKERRQ(ierr); 2003 } else { 2004 *B = An; 2005 } 2006 PetscFunctionReturn(0); 2007 } 2008 2009 /* TODO: add reuse flag */ 2010 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2011 { 2012 Mat Bt; 2013 PetscScalar *a,*bdata; 2014 const PetscInt *ii,*ij; 2015 PetscInt m,n,i,nnz,*bii,*bij; 2016 PetscBool flg_row; 2017 PetscErrorCode ierr; 2018 2019 PetscFunctionBegin; 2020 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2021 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2022 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2023 nnz = n; 2024 for (i=0;i<ii[n];i++) { 2025 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2026 } 2027 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2028 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2029 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2030 nnz = 0; 2031 bii[0] = 0; 2032 for (i=0;i<n;i++) { 2033 PetscInt j; 2034 for (j=ii[i];j<ii[i+1];j++) { 2035 PetscScalar entry = a[j]; 2036 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 2037 bij[nnz] = ij[j]; 2038 bdata[nnz] = entry; 2039 nnz++; 2040 } 2041 } 2042 bii[i+1] = nnz; 2043 } 2044 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2045 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2046 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2047 { 2048 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2049 b->free_a = PETSC_TRUE; 2050 b->free_ij = PETSC_TRUE; 2051 } 2052 *B = Bt; 2053 PetscFunctionReturn(0); 2054 } 2055 2056 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscInt *ncc, IS* cc[], IS* primalv) 2057 { 2058 Mat B = NULL; 2059 DM dm; 2060 IS is_dummy,*cc_n; 2061 ISLocalToGlobalMapping l2gmap_dummy; 2062 PCBDDCGraph graph; 2063 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2064 PetscInt i,n; 2065 PetscInt *xadj,*adjncy; 2066 PetscBool isplex = PETSC_FALSE; 2067 PetscErrorCode ierr; 2068 2069 PetscFunctionBegin; 2070 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2071 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2072 if (!dm) { 2073 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2074 } 2075 if (dm) { 2076 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2077 } 2078 if (isplex) { /* this code has been modified from plexpartition.c */ 2079 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2080 PetscInt *adj = NULL; 2081 IS cellNumbering; 2082 const PetscInt *cellNum; 2083 PetscBool useCone, useClosure; 2084 PetscSection section; 2085 PetscSegBuffer adjBuffer; 2086 PetscSF sfPoint; 2087 PetscErrorCode ierr; 2088 2089 PetscFunctionBegin; 2090 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2091 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2092 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2093 /* Build adjacency graph via a section/segbuffer */ 2094 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2095 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2096 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2097 /* Always use FVM adjacency to create partitioner graph */ 2098 ierr = DMPlexGetAdjacencyUseCone(dm, &useCone);CHKERRQ(ierr); 2099 ierr = DMPlexGetAdjacencyUseClosure(dm, &useClosure);CHKERRQ(ierr); 2100 ierr = DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);CHKERRQ(ierr); 2101 ierr = DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);CHKERRQ(ierr); 2102 ierr = DMPlexCreateCellNumbering_Internal(dm, PETSC_TRUE, &cellNumbering);CHKERRQ(ierr); 2103 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2104 for (n = 0, p = pStart; p < pEnd; p++) { 2105 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2106 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2107 adjSize = PETSC_DETERMINE; 2108 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2109 for (a = 0; a < adjSize; ++a) { 2110 const PetscInt point = adj[a]; 2111 if (point != p && pStart <= point && point < pEnd) { 2112 PetscInt *PETSC_RESTRICT pBuf; 2113 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2114 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2115 *pBuf = point; 2116 } 2117 } 2118 n++; 2119 } 2120 ierr = DMPlexSetAdjacencyUseCone(dm, useCone);CHKERRQ(ierr); 2121 ierr = DMPlexSetAdjacencyUseClosure(dm, useClosure);CHKERRQ(ierr); 2122 /* Derive CSR graph from section/segbuffer */ 2123 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2124 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2125 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2126 for (idx = 0, p = pStart; p < pEnd; p++) { 2127 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2128 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2129 } 2130 xadj[n] = size; 2131 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2132 /* Clean up */ 2133 ierr = ISDestroy(&cellNumbering);CHKERRQ(ierr); 2134 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2135 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2136 ierr = PetscFree(adj);CHKERRQ(ierr); 2137 graph->xadj = xadj; 2138 graph->adjncy = adjncy; 2139 } else { 2140 Mat A; 2141 PetscBool filter = PETSC_FALSE, isseqaij, flg_row; 2142 2143 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2144 if (!A->rmap->N || !A->cmap->N) { 2145 *ncc = 0; 2146 *cc = NULL; 2147 PetscFunctionReturn(0); 2148 } 2149 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2150 if (!isseqaij && filter) { 2151 PetscBool isseqdense; 2152 2153 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2154 if (!isseqdense) { 2155 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2156 } else { /* TODO: rectangular case and LDA */ 2157 PetscScalar *array; 2158 PetscReal chop=1.e-6; 2159 2160 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2161 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2162 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2163 for (i=0;i<n;i++) { 2164 PetscInt j; 2165 for (j=i+1;j<n;j++) { 2166 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2167 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2168 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2169 } 2170 } 2171 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2172 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2173 } 2174 } else { 2175 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2176 B = A; 2177 } 2178 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2179 2180 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2181 if (filter) { 2182 PetscScalar *data; 2183 PetscInt j,cum; 2184 2185 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2186 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2187 cum = 0; 2188 for (i=0;i<n;i++) { 2189 PetscInt t; 2190 2191 for (j=xadj[i];j<xadj[i+1];j++) { 2192 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2193 continue; 2194 } 2195 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2196 } 2197 t = xadj_filtered[i]; 2198 xadj_filtered[i] = cum; 2199 cum += t; 2200 } 2201 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2202 graph->xadj = xadj_filtered; 2203 graph->adjncy = adjncy_filtered; 2204 } else { 2205 graph->xadj = xadj; 2206 graph->adjncy = adjncy; 2207 } 2208 } 2209 /* compute local connected components using PCBDDCGraph */ 2210 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2211 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2212 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2213 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2214 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2215 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2216 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2217 2218 /* partial clean up */ 2219 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2220 if (B) { 2221 PetscBool flg_row; 2222 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2223 ierr = MatDestroy(&B);CHKERRQ(ierr); 2224 } 2225 if (isplex) { 2226 ierr = PetscFree(xadj);CHKERRQ(ierr); 2227 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2228 } 2229 2230 /* get back data */ 2231 if (isplex) { 2232 if (ncc) *ncc = graph->ncc; 2233 if (cc || primalv) { 2234 Mat A; 2235 PetscBT btv,btvt; 2236 PetscSection subSection; 2237 PetscInt *ids,cum,cump,*cids,*pids; 2238 2239 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2240 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2241 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2242 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2243 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2244 2245 cids[0] = 0; 2246 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2247 PetscInt j; 2248 2249 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2250 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2251 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2252 2253 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2254 for (k = 0; k < 2*size; k += 2) { 2255 PetscInt s, p = closure[k], off, dof, cdof; 2256 2257 ierr = PetscSectionGetConstraintDof(subSection, p, &cdof);CHKERRQ(ierr); 2258 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2259 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2260 for (s = 0; s < dof-cdof; s++) { 2261 if (PetscBTLookupSet(btvt,off+s)) continue; 2262 if (!PetscBTLookup(btv,off+s)) { 2263 ids[cum++] = off+s; 2264 } else { /* cross-vertex */ 2265 pids[cump++] = off+s; 2266 } 2267 } 2268 } 2269 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2270 } 2271 cids[i+1] = cum; 2272 /* mark dofs as already assigned */ 2273 for (j = cids[i]; j < cids[i+1]; j++) { 2274 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2275 } 2276 } 2277 if (cc) { 2278 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2279 for (i = 0; i < graph->ncc; i++) { 2280 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2281 } 2282 *cc = cc_n; 2283 } 2284 if (primalv) { 2285 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2286 } 2287 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2288 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2289 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2290 } 2291 } else { 2292 if (ncc) *ncc = graph->ncc; 2293 if (cc) { 2294 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2295 for (i=0;i<graph->ncc;i++) { 2296 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2297 } 2298 *cc = cc_n; 2299 } 2300 if (primalv) *primalv = NULL; 2301 } 2302 /* clean up graph */ 2303 graph->xadj = 0; 2304 graph->adjncy = 0; 2305 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2306 PetscFunctionReturn(0); 2307 } 2308 2309 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2310 { 2311 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2312 PC_IS* pcis = (PC_IS*)(pc->data); 2313 IS dirIS = NULL; 2314 PetscInt i; 2315 PetscErrorCode ierr; 2316 2317 PetscFunctionBegin; 2318 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2319 if (zerodiag) { 2320 Mat A; 2321 Vec vec3_N; 2322 PetscScalar *vals; 2323 const PetscInt *idxs; 2324 PetscInt nz,*count; 2325 2326 /* p0 */ 2327 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2328 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2329 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2330 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2331 for (i=0;i<nz;i++) vals[i] = 1.; 2332 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2333 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2334 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2335 /* v_I */ 2336 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2337 for (i=0;i<nz;i++) vals[i] = 0.; 2338 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2339 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2340 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2341 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2342 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2343 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2344 if (dirIS) { 2345 PetscInt n; 2346 2347 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2348 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2349 for (i=0;i<n;i++) vals[i] = 0.; 2350 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2351 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2352 } 2353 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2354 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2355 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2356 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2357 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2358 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2359 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2360 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2361 ierr = PetscFree(vals);CHKERRQ(ierr); 2362 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2363 2364 /* there should not be any pressure dofs lying on the interface */ 2365 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2366 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2367 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2368 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2369 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2370 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2371 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2372 ierr = PetscFree(count);CHKERRQ(ierr); 2373 } 2374 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2375 2376 /* check PCBDDCBenignGetOrSetP0 */ 2377 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2378 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2379 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2380 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2381 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2382 for (i=0;i<pcbddc->benign_n;i++) { 2383 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2384 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i);CHKERRQ(ierr); 2385 } 2386 PetscFunctionReturn(0); 2387 } 2388 2389 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2390 { 2391 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2392 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2393 PetscInt nz,n; 2394 PetscInt *interior_dofs,n_interior_dofs,nneu; 2395 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2396 PetscErrorCode ierr; 2397 2398 PetscFunctionBegin; 2399 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2400 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2401 for (n=0;n<pcbddc->benign_n;n++) { 2402 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2403 } 2404 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2405 pcbddc->benign_n = 0; 2406 2407 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2408 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2409 Checks if all the pressure dofs in each subdomain have a zero diagonal 2410 If not, a change of basis on pressures is not needed 2411 since the local Schur complements are already SPD 2412 */ 2413 has_null_pressures = PETSC_TRUE; 2414 have_null = PETSC_TRUE; 2415 if (pcbddc->n_ISForDofsLocal) { 2416 IS iP = NULL; 2417 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2418 2419 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2420 ierr = PetscOptionsInt ("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2421 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2422 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2423 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2424 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2425 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2426 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2427 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2428 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2429 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2430 if (iP) { 2431 IS newpressures; 2432 2433 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2434 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2435 pressures = newpressures; 2436 } 2437 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2438 if (!sorted) { 2439 ierr = ISSort(pressures);CHKERRQ(ierr); 2440 } 2441 } else { 2442 pressures = NULL; 2443 } 2444 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2445 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2446 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2447 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2448 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2449 if (!sorted) { 2450 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2451 } 2452 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2453 zerodiag_save = zerodiag; 2454 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2455 if (!nz) { 2456 if (n) have_null = PETSC_FALSE; 2457 has_null_pressures = PETSC_FALSE; 2458 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2459 } 2460 recompute_zerodiag = PETSC_FALSE; 2461 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2462 zerodiag_subs = NULL; 2463 pcbddc->benign_n = 0; 2464 n_interior_dofs = 0; 2465 interior_dofs = NULL; 2466 nneu = 0; 2467 if (pcbddc->NeumannBoundariesLocal) { 2468 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2469 } 2470 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2471 if (checkb) { /* need to compute interior nodes */ 2472 PetscInt n,i,j; 2473 PetscInt n_neigh,*neigh,*n_shared,**shared; 2474 PetscInt *iwork; 2475 2476 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2477 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2478 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2479 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2480 for (i=1;i<n_neigh;i++) 2481 for (j=0;j<n_shared[i];j++) 2482 iwork[shared[i][j]] += 1; 2483 for (i=0;i<n;i++) 2484 if (!iwork[i]) 2485 interior_dofs[n_interior_dofs++] = i; 2486 ierr = PetscFree(iwork);CHKERRQ(ierr); 2487 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2488 } 2489 if (has_null_pressures) { 2490 IS *subs; 2491 PetscInt nsubs,i,j,nl; 2492 const PetscInt *idxs; 2493 PetscScalar *array; 2494 Vec *work; 2495 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2496 2497 subs = pcbddc->local_subs; 2498 nsubs = pcbddc->n_local_subs; 2499 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2500 if (checkb) { 2501 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2502 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2503 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2504 /* work[0] = 1_p */ 2505 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2506 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2507 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2508 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2509 /* work[0] = 1_v */ 2510 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2511 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2512 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2513 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2514 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2515 } 2516 if (nsubs > 1) { 2517 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2518 for (i=0;i<nsubs;i++) { 2519 ISLocalToGlobalMapping l2g; 2520 IS t_zerodiag_subs; 2521 PetscInt nl; 2522 2523 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2524 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2525 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2526 if (nl) { 2527 PetscBool valid = PETSC_TRUE; 2528 2529 if (checkb) { 2530 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2531 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2532 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2533 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2534 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2535 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2536 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2537 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2538 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2539 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2540 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2541 for (j=0;j<n_interior_dofs;j++) { 2542 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2543 valid = PETSC_FALSE; 2544 break; 2545 } 2546 } 2547 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2548 } 2549 if (valid && nneu) { 2550 const PetscInt *idxs; 2551 PetscInt nzb; 2552 2553 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2554 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2555 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2556 if (nzb) valid = PETSC_FALSE; 2557 } 2558 if (valid && pressures) { 2559 IS t_pressure_subs; 2560 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2561 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2562 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2563 } 2564 if (valid) { 2565 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2566 pcbddc->benign_n++; 2567 } else { 2568 recompute_zerodiag = PETSC_TRUE; 2569 } 2570 } 2571 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2572 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2573 } 2574 } else { /* there's just one subdomain (or zero if they have not been detected */ 2575 PetscBool valid = PETSC_TRUE; 2576 2577 if (nneu) valid = PETSC_FALSE; 2578 if (valid && pressures) { 2579 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2580 } 2581 if (valid && checkb) { 2582 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2583 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2584 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2585 for (j=0;j<n_interior_dofs;j++) { 2586 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2587 valid = PETSC_FALSE; 2588 break; 2589 } 2590 } 2591 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2592 } 2593 if (valid) { 2594 pcbddc->benign_n = 1; 2595 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2596 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2597 zerodiag_subs[0] = zerodiag; 2598 } 2599 } 2600 if (checkb) { 2601 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2602 } 2603 } 2604 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2605 2606 if (!pcbddc->benign_n) { 2607 PetscInt n; 2608 2609 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2610 recompute_zerodiag = PETSC_FALSE; 2611 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2612 if (n) { 2613 has_null_pressures = PETSC_FALSE; 2614 have_null = PETSC_FALSE; 2615 } 2616 } 2617 2618 /* final check for null pressures */ 2619 if (zerodiag && pressures) { 2620 PetscInt nz,np; 2621 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2622 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2623 if (nz != np) have_null = PETSC_FALSE; 2624 } 2625 2626 if (recompute_zerodiag) { 2627 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2628 if (pcbddc->benign_n == 1) { 2629 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2630 zerodiag = zerodiag_subs[0]; 2631 } else { 2632 PetscInt i,nzn,*new_idxs; 2633 2634 nzn = 0; 2635 for (i=0;i<pcbddc->benign_n;i++) { 2636 PetscInt ns; 2637 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2638 nzn += ns; 2639 } 2640 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2641 nzn = 0; 2642 for (i=0;i<pcbddc->benign_n;i++) { 2643 PetscInt ns,*idxs; 2644 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2645 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2646 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2647 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2648 nzn += ns; 2649 } 2650 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2651 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2652 } 2653 have_null = PETSC_FALSE; 2654 } 2655 2656 /* Prepare matrix to compute no-net-flux */ 2657 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2658 Mat A,loc_divudotp; 2659 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2660 IS row,col,isused = NULL; 2661 PetscInt M,N,n,st,n_isused; 2662 2663 if (pressures) { 2664 isused = pressures; 2665 } else { 2666 isused = zerodiag_save; 2667 } 2668 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2669 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2670 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2671 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2672 n_isused = 0; 2673 if (isused) { 2674 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2675 } 2676 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2677 st = st-n_isused; 2678 if (n) { 2679 const PetscInt *gidxs; 2680 2681 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2682 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2683 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2684 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2685 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2686 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2687 } else { 2688 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2689 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2690 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2691 } 2692 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2693 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2694 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2695 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2696 ierr = ISDestroy(&row);CHKERRQ(ierr); 2697 ierr = ISDestroy(&col);CHKERRQ(ierr); 2698 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2699 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2700 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2701 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2702 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2703 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2704 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2705 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2706 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2707 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2708 } 2709 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2710 2711 /* change of basis and p0 dofs */ 2712 if (has_null_pressures) { 2713 IS zerodiagc; 2714 const PetscInt *idxs,*idxsc; 2715 PetscInt i,s,*nnz; 2716 2717 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2718 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2719 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2720 /* local change of basis for pressures */ 2721 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2722 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2723 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2724 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2725 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2726 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2727 for (i=0;i<pcbddc->benign_n;i++) { 2728 PetscInt nzs,j; 2729 2730 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2731 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2732 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2733 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2734 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2735 } 2736 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2737 ierr = PetscFree(nnz);CHKERRQ(ierr); 2738 /* set identity on velocities */ 2739 for (i=0;i<n-nz;i++) { 2740 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2741 } 2742 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2743 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2744 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2745 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2746 /* set change on pressures */ 2747 for (s=0;s<pcbddc->benign_n;s++) { 2748 PetscScalar *array; 2749 PetscInt nzs; 2750 2751 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2752 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2753 for (i=0;i<nzs-1;i++) { 2754 PetscScalar vals[2]; 2755 PetscInt cols[2]; 2756 2757 cols[0] = idxs[i]; 2758 cols[1] = idxs[nzs-1]; 2759 vals[0] = 1.; 2760 vals[1] = 1.; 2761 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2762 } 2763 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2764 for (i=0;i<nzs-1;i++) array[i] = -1.; 2765 array[nzs-1] = 1.; 2766 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2767 /* store local idxs for p0 */ 2768 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2769 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2770 ierr = PetscFree(array);CHKERRQ(ierr); 2771 } 2772 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2773 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2774 /* project if needed */ 2775 if (pcbddc->benign_change_explicit) { 2776 Mat M; 2777 2778 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2779 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2780 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2781 ierr = MatDestroy(&M);CHKERRQ(ierr); 2782 } 2783 /* store global idxs for p0 */ 2784 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2785 } 2786 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2787 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2788 2789 /* determines if the coarse solver will be singular or not */ 2790 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2791 /* determines if the problem has subdomains with 0 pressure block */ 2792 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2793 *zerodiaglocal = zerodiag; 2794 PetscFunctionReturn(0); 2795 } 2796 2797 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2798 { 2799 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2800 PetscScalar *array; 2801 PetscErrorCode ierr; 2802 2803 PetscFunctionBegin; 2804 if (!pcbddc->benign_sf) { 2805 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2806 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2807 } 2808 if (get) { 2809 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2810 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2811 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2812 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2813 } else { 2814 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2815 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2816 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2817 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2818 } 2819 PetscFunctionReturn(0); 2820 } 2821 2822 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2823 { 2824 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2825 PetscErrorCode ierr; 2826 2827 PetscFunctionBegin; 2828 /* TODO: add error checking 2829 - avoid nested pop (or push) calls. 2830 - cannot push before pop. 2831 - cannot call this if pcbddc->local_mat is NULL 2832 */ 2833 if (!pcbddc->benign_n) { 2834 PetscFunctionReturn(0); 2835 } 2836 if (pop) { 2837 if (pcbddc->benign_change_explicit) { 2838 IS is_p0; 2839 MatReuse reuse; 2840 2841 /* extract B_0 */ 2842 reuse = MAT_INITIAL_MATRIX; 2843 if (pcbddc->benign_B0) { 2844 reuse = MAT_REUSE_MATRIX; 2845 } 2846 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2847 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2848 /* remove rows and cols from local problem */ 2849 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2850 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2851 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2852 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2853 } else { 2854 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2855 PetscScalar *vals; 2856 PetscInt i,n,*idxs_ins; 2857 2858 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2859 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2860 if (!pcbddc->benign_B0) { 2861 PetscInt *nnz; 2862 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2863 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2864 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2865 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2866 for (i=0;i<pcbddc->benign_n;i++) { 2867 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2868 nnz[i] = n - nnz[i]; 2869 } 2870 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2871 ierr = PetscFree(nnz);CHKERRQ(ierr); 2872 } 2873 2874 for (i=0;i<pcbddc->benign_n;i++) { 2875 PetscScalar *array; 2876 PetscInt *idxs,j,nz,cum; 2877 2878 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2879 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2880 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2881 for (j=0;j<nz;j++) vals[j] = 1.; 2882 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2883 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2884 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2885 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2886 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2887 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2888 cum = 0; 2889 for (j=0;j<n;j++) { 2890 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2891 vals[cum] = array[j]; 2892 idxs_ins[cum] = j; 2893 cum++; 2894 } 2895 } 2896 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2897 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2898 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2899 } 2900 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2901 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2902 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2903 } 2904 } else { /* push */ 2905 if (pcbddc->benign_change_explicit) { 2906 PetscInt i; 2907 2908 for (i=0;i<pcbddc->benign_n;i++) { 2909 PetscScalar *B0_vals; 2910 PetscInt *B0_cols,B0_ncol; 2911 2912 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2913 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2914 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2915 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2916 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2917 } 2918 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2919 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2920 } else { 2921 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2922 } 2923 } 2924 PetscFunctionReturn(0); 2925 } 2926 2927 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2928 { 2929 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2930 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2931 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2932 PetscBLASInt *B_iwork,*B_ifail; 2933 PetscScalar *work,lwork; 2934 PetscScalar *St,*S,*eigv; 2935 PetscScalar *Sarray,*Starray; 2936 PetscReal *eigs,thresh; 2937 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2938 PetscBool allocated_S_St; 2939 #if defined(PETSC_USE_COMPLEX) 2940 PetscReal *rwork; 2941 #endif 2942 PetscErrorCode ierr; 2943 2944 PetscFunctionBegin; 2945 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2946 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2947 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\nRerun with -sub_schurs_hermitian 1 -sub_schurs_posdef 1 if the problem is SPD",sub_schurs->is_hermitian,sub_schurs->is_posdef); 2948 2949 if (pcbddc->dbg_flag) { 2950 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 2951 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 2952 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 2953 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 2954 } 2955 2956 if (pcbddc->dbg_flag) { 2957 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 2958 } 2959 2960 /* max size of subsets */ 2961 mss = 0; 2962 for (i=0;i<sub_schurs->n_subs;i++) { 2963 PetscInt subset_size; 2964 2965 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2966 mss = PetscMax(mss,subset_size); 2967 } 2968 2969 /* min/max and threshold */ 2970 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 2971 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 2972 nmax = PetscMax(nmin,nmax); 2973 allocated_S_St = PETSC_FALSE; 2974 if (nmin) { 2975 allocated_S_St = PETSC_TRUE; 2976 } 2977 2978 /* allocate lapack workspace */ 2979 cum = cum2 = 0; 2980 maxneigs = 0; 2981 for (i=0;i<sub_schurs->n_subs;i++) { 2982 PetscInt n,subset_size; 2983 2984 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 2985 n = PetscMin(subset_size,nmax); 2986 cum += subset_size; 2987 cum2 += subset_size*n; 2988 maxneigs = PetscMax(maxneigs,n); 2989 } 2990 if (mss) { 2991 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 2992 PetscBLASInt B_itype = 1; 2993 PetscBLASInt B_N = mss; 2994 PetscReal zero = 0.0; 2995 PetscReal eps = 0.0; /* dlamch? */ 2996 2997 B_lwork = -1; 2998 S = NULL; 2999 St = NULL; 3000 eigs = NULL; 3001 eigv = NULL; 3002 B_iwork = NULL; 3003 B_ifail = NULL; 3004 #if defined(PETSC_USE_COMPLEX) 3005 rwork = NULL; 3006 #endif 3007 thresh = 1.0; 3008 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3009 #if defined(PETSC_USE_COMPLEX) 3010 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3011 #else 3012 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3013 #endif 3014 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3015 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3016 } else { 3017 /* TODO */ 3018 } 3019 } else { 3020 lwork = 0; 3021 } 3022 3023 nv = 0; 3024 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3025 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3026 } 3027 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3028 if (allocated_S_St) { 3029 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3030 } 3031 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3032 #if defined(PETSC_USE_COMPLEX) 3033 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3034 #endif 3035 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3036 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3037 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3038 nv+cum,&pcbddc->adaptive_constraints_idxs, 3039 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3040 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 3041 3042 maxneigs = 0; 3043 cum = cumarray = 0; 3044 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3045 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3046 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3047 const PetscInt *idxs; 3048 3049 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3050 for (cum=0;cum<nv;cum++) { 3051 pcbddc->adaptive_constraints_n[cum] = 1; 3052 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3053 pcbddc->adaptive_constraints_data[cum] = 1.0; 3054 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3055 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3056 } 3057 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3058 } 3059 3060 if (mss) { /* multilevel */ 3061 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3062 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3063 } 3064 3065 thresh = pcbddc->adaptive_threshold; 3066 for (i=0;i<sub_schurs->n_subs;i++) { 3067 const PetscInt *idxs; 3068 PetscReal upper,lower; 3069 PetscInt j,subset_size,eigs_start = 0; 3070 PetscBLASInt B_N; 3071 PetscBool same_data = PETSC_FALSE; 3072 3073 if (pcbddc->use_deluxe_scaling) { 3074 upper = PETSC_MAX_REAL; 3075 lower = thresh; 3076 } else { 3077 upper = 1./thresh; 3078 lower = 0.; 3079 } 3080 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3081 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3082 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3083 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3084 if (sub_schurs->is_hermitian) { 3085 PetscInt j,k; 3086 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 3087 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3088 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3089 } 3090 for (j=0;j<subset_size;j++) { 3091 for (k=j;k<subset_size;k++) { 3092 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3093 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3094 } 3095 } 3096 } else { 3097 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3098 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3099 } 3100 } else { 3101 S = Sarray + cumarray; 3102 St = Starray + cumarray; 3103 } 3104 /* see if we can save some work */ 3105 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3106 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 3107 } 3108 3109 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3110 B_neigs = 0; 3111 } else { 3112 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 3113 PetscBLASInt B_itype = 1; 3114 PetscBLASInt B_IL, B_IU; 3115 PetscReal eps = -1.0; /* dlamch? */ 3116 PetscInt nmin_s; 3117 PetscBool compute_range = PETSC_FALSE; 3118 3119 if (pcbddc->dbg_flag) { 3120 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]); 3121 } 3122 3123 compute_range = PETSC_FALSE; 3124 if (thresh > 1.+PETSC_SMALL && !same_data) { 3125 compute_range = PETSC_TRUE; 3126 } 3127 3128 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3129 if (compute_range) { 3130 3131 /* ask for eigenvalues larger than thresh */ 3132 #if defined(PETSC_USE_COMPLEX) 3133 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3134 #else 3135 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3136 #endif 3137 } else if (!same_data) { 3138 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3139 B_IL = 1; 3140 #if defined(PETSC_USE_COMPLEX) 3141 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3142 #else 3143 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3144 #endif 3145 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3146 PetscInt k; 3147 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3148 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3149 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3150 nmin = nmax; 3151 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 3152 for (k=0;k<nmax;k++) { 3153 eigs[k] = 1./PETSC_SMALL; 3154 eigv[k*(subset_size+1)] = 1.0; 3155 } 3156 } 3157 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3158 if (B_ierr) { 3159 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3160 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3161 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3162 } 3163 3164 if (B_neigs > nmax) { 3165 if (pcbddc->dbg_flag) { 3166 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 3167 } 3168 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 3169 B_neigs = nmax; 3170 } 3171 3172 nmin_s = PetscMin(nmin,B_N); 3173 if (B_neigs < nmin_s) { 3174 PetscBLASInt B_neigs2; 3175 3176 if (pcbddc->use_deluxe_scaling) { 3177 B_IL = B_N - nmin_s + 1; 3178 B_IU = B_N - B_neigs; 3179 } else { 3180 B_IL = B_neigs + 1; 3181 B_IU = nmin_s; 3182 } 3183 if (pcbddc->dbg_flag) { 3184 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 3185 } 3186 if (sub_schurs->is_hermitian) { 3187 PetscInt j,k; 3188 for (j=0;j<subset_size;j++) { 3189 for (k=j;k<subset_size;k++) { 3190 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3191 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3192 } 3193 } 3194 } else { 3195 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3196 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3197 } 3198 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3199 #if defined(PETSC_USE_COMPLEX) 3200 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3201 #else 3202 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3203 #endif 3204 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3205 B_neigs += B_neigs2; 3206 } 3207 if (B_ierr) { 3208 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3209 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3210 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3211 } 3212 if (pcbddc->dbg_flag) { 3213 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3214 for (j=0;j<B_neigs;j++) { 3215 if (eigs[j] == 0.0) { 3216 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3217 } else { 3218 if (pcbddc->use_deluxe_scaling) { 3219 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3220 } else { 3221 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3222 } 3223 } 3224 } 3225 } 3226 } else { 3227 /* TODO */ 3228 } 3229 } 3230 /* change the basis back to the original one */ 3231 if (sub_schurs->change) { 3232 Mat change,phi,phit; 3233 3234 if (pcbddc->dbg_flag > 1) { 3235 PetscInt ii; 3236 for (ii=0;ii<B_neigs;ii++) { 3237 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3238 for (j=0;j<B_N;j++) { 3239 #if defined(PETSC_USE_COMPLEX) 3240 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3241 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3242 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3243 #else 3244 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3245 #endif 3246 } 3247 } 3248 } 3249 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3250 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3251 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3252 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3253 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3254 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3255 } 3256 maxneigs = PetscMax(B_neigs,maxneigs); 3257 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3258 if (B_neigs) { 3259 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3260 3261 if (pcbddc->dbg_flag > 1) { 3262 PetscInt ii; 3263 for (ii=0;ii<B_neigs;ii++) { 3264 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3265 for (j=0;j<B_N;j++) { 3266 #if defined(PETSC_USE_COMPLEX) 3267 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3268 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3269 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3270 #else 3271 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3272 #endif 3273 } 3274 } 3275 } 3276 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3277 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3278 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3279 cum++; 3280 } 3281 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3282 /* shift for next computation */ 3283 cumarray += subset_size*subset_size; 3284 } 3285 if (pcbddc->dbg_flag) { 3286 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3287 } 3288 3289 if (mss) { 3290 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3291 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3292 /* destroy matrices (junk) */ 3293 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3294 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3295 } 3296 if (allocated_S_St) { 3297 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3298 } 3299 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3300 #if defined(PETSC_USE_COMPLEX) 3301 ierr = PetscFree(rwork);CHKERRQ(ierr); 3302 #endif 3303 if (pcbddc->dbg_flag) { 3304 PetscInt maxneigs_r; 3305 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3306 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3307 } 3308 PetscFunctionReturn(0); 3309 } 3310 3311 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3312 { 3313 PetscScalar *coarse_submat_vals; 3314 PetscErrorCode ierr; 3315 3316 PetscFunctionBegin; 3317 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3318 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3319 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3320 3321 /* Setup local neumann solver ksp_R */ 3322 /* PCBDDCSetUpLocalScatters should be called first! */ 3323 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3324 3325 /* 3326 Setup local correction and local part of coarse basis. 3327 Gives back the dense local part of the coarse matrix in column major ordering 3328 */ 3329 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3330 3331 /* Compute total number of coarse nodes and setup coarse solver */ 3332 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3333 3334 /* free */ 3335 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3336 PetscFunctionReturn(0); 3337 } 3338 3339 PetscErrorCode PCBDDCResetCustomization(PC pc) 3340 { 3341 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3342 PetscErrorCode ierr; 3343 3344 PetscFunctionBegin; 3345 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3346 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3347 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3348 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3349 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3350 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3351 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3352 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3353 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3354 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3355 PetscFunctionReturn(0); 3356 } 3357 3358 PetscErrorCode PCBDDCResetTopography(PC pc) 3359 { 3360 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3361 PetscInt i; 3362 PetscErrorCode ierr; 3363 3364 PetscFunctionBegin; 3365 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3366 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3367 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3368 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3369 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3370 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3371 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3372 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3373 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3374 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3375 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3376 for (i=0;i<pcbddc->n_local_subs;i++) { 3377 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3378 } 3379 pcbddc->n_local_subs = 0; 3380 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3381 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3382 pcbddc->graphanalyzed = PETSC_FALSE; 3383 pcbddc->recompute_topography = PETSC_TRUE; 3384 PetscFunctionReturn(0); 3385 } 3386 3387 PetscErrorCode PCBDDCResetSolvers(PC pc) 3388 { 3389 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3390 PetscErrorCode ierr; 3391 3392 PetscFunctionBegin; 3393 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3394 if (pcbddc->coarse_phi_B) { 3395 PetscScalar *array; 3396 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3397 ierr = PetscFree(array);CHKERRQ(ierr); 3398 } 3399 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3400 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3401 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3402 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3403 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3404 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3405 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3406 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3407 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3408 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3409 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3410 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3411 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3412 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3413 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3414 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3415 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3416 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3417 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3418 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3419 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3420 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3421 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3422 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3423 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3424 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3425 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3426 if (pcbddc->benign_zerodiag_subs) { 3427 PetscInt i; 3428 for (i=0;i<pcbddc->benign_n;i++) { 3429 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3430 } 3431 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3432 } 3433 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3434 PetscFunctionReturn(0); 3435 } 3436 3437 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3438 { 3439 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3440 PC_IS *pcis = (PC_IS*)pc->data; 3441 VecType impVecType; 3442 PetscInt n_constraints,n_R,old_size; 3443 PetscErrorCode ierr; 3444 3445 PetscFunctionBegin; 3446 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3447 n_R = pcis->n - pcbddc->n_vertices; 3448 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3449 /* local work vectors (try to avoid unneeded work)*/ 3450 /* R nodes */ 3451 old_size = -1; 3452 if (pcbddc->vec1_R) { 3453 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3454 } 3455 if (n_R != old_size) { 3456 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3457 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3458 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3459 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3460 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3461 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3462 } 3463 /* local primal dofs */ 3464 old_size = -1; 3465 if (pcbddc->vec1_P) { 3466 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3467 } 3468 if (pcbddc->local_primal_size != old_size) { 3469 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3470 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3471 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3472 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3473 } 3474 /* local explicit constraints */ 3475 old_size = -1; 3476 if (pcbddc->vec1_C) { 3477 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3478 } 3479 if (n_constraints && n_constraints != old_size) { 3480 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3481 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3482 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3483 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3484 } 3485 PetscFunctionReturn(0); 3486 } 3487 3488 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3489 { 3490 PetscErrorCode ierr; 3491 /* pointers to pcis and pcbddc */ 3492 PC_IS* pcis = (PC_IS*)pc->data; 3493 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3494 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3495 /* submatrices of local problem */ 3496 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3497 /* submatrices of local coarse problem */ 3498 Mat S_VV,S_CV,S_VC,S_CC; 3499 /* working matrices */ 3500 Mat C_CR; 3501 /* additional working stuff */ 3502 PC pc_R; 3503 Mat F; 3504 Vec dummy_vec; 3505 PetscBool isLU,isCHOL,isILU,need_benign_correction; 3506 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3507 PetscScalar *work; 3508 PetscInt *idx_V_B; 3509 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3510 PetscInt i,n_R,n_D,n_B; 3511 3512 /* some shortcuts to scalars */ 3513 PetscScalar one=1.0,m_one=-1.0; 3514 3515 PetscFunctionBegin; 3516 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3517 3518 /* Set Non-overlapping dimensions */ 3519 n_vertices = pcbddc->n_vertices; 3520 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3521 n_B = pcis->n_B; 3522 n_D = pcis->n - n_B; 3523 n_R = pcis->n - n_vertices; 3524 3525 /* vertices in boundary numbering */ 3526 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3527 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3528 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3529 3530 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3531 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3532 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3533 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3534 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3535 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3536 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3537 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3538 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3539 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3540 3541 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3542 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3543 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3544 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3545 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3546 lda_rhs = n_R; 3547 need_benign_correction = PETSC_FALSE; 3548 if (isLU || isILU || isCHOL) { 3549 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3550 } else if (sub_schurs && sub_schurs->reuse_solver) { 3551 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3552 MatFactorType type; 3553 3554 F = reuse_solver->F; 3555 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3556 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3557 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3558 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3559 } else { 3560 F = NULL; 3561 } 3562 3563 /* allocate workspace */ 3564 n = 0; 3565 if (n_constraints) { 3566 n += lda_rhs*n_constraints; 3567 } 3568 if (n_vertices) { 3569 n = PetscMax(2*lda_rhs*n_vertices,n); 3570 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3571 } 3572 if (!pcbddc->symmetric_primal) { 3573 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3574 } 3575 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3576 3577 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3578 dummy_vec = NULL; 3579 if (need_benign_correction && lda_rhs != n_R && F) { 3580 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3581 } 3582 3583 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3584 if (n_constraints) { 3585 Mat M1,M2,M3,C_B; 3586 IS is_aux; 3587 PetscScalar *array,*array2; 3588 3589 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3590 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3591 3592 /* Extract constraints on R nodes: C_{CR} */ 3593 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3594 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3595 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3596 3597 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3598 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3599 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3600 for (i=0;i<n_constraints;i++) { 3601 const PetscScalar *row_cmat_values; 3602 const PetscInt *row_cmat_indices; 3603 PetscInt size_of_constraint,j; 3604 3605 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3606 for (j=0;j<size_of_constraint;j++) { 3607 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3608 } 3609 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3610 } 3611 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3612 if (F) { 3613 Mat B; 3614 3615 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 3616 if (need_benign_correction) { 3617 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3618 3619 /* rhs is already zero on interior dofs, no need to change the rhs */ 3620 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3621 } 3622 ierr = MatMatSolve(F,B,local_auxmat2_R);CHKERRQ(ierr); 3623 if (need_benign_correction) { 3624 PetscScalar *marr; 3625 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3626 3627 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3628 if (lda_rhs != n_R) { 3629 for (i=0;i<n_constraints;i++) { 3630 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3631 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3632 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3633 } 3634 } else { 3635 for (i=0;i<n_constraints;i++) { 3636 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3637 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3638 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3639 } 3640 } 3641 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3642 } 3643 ierr = MatDestroy(&B);CHKERRQ(ierr); 3644 } else { 3645 PetscScalar *marr; 3646 3647 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3648 for (i=0;i<n_constraints;i++) { 3649 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3650 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3651 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3652 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3653 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3654 } 3655 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3656 } 3657 if (!pcbddc->switch_static) { 3658 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3659 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3660 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3661 for (i=0;i<n_constraints;i++) { 3662 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3663 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3664 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3665 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3666 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3667 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3668 } 3669 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3670 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3671 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3672 } else { 3673 if (lda_rhs != n_R) { 3674 IS dummy; 3675 3676 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3677 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3678 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3679 } else { 3680 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3681 pcbddc->local_auxmat2 = local_auxmat2_R; 3682 } 3683 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3684 } 3685 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3686 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3687 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3688 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3689 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3690 if (isCHOL) { 3691 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3692 } else { 3693 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3694 } 3695 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3696 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3697 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3698 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3699 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3700 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3701 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3702 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3703 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3704 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3705 } 3706 3707 /* Get submatrices from subdomain matrix */ 3708 if (n_vertices) { 3709 IS is_aux; 3710 3711 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3712 IS tis; 3713 3714 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3715 ierr = ISSort(tis);CHKERRQ(ierr); 3716 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3717 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3718 } else { 3719 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3720 } 3721 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3722 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3723 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3724 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3725 } 3726 3727 /* Matrix of coarse basis functions (local) */ 3728 if (pcbddc->coarse_phi_B) { 3729 PetscInt on_B,on_primal,on_D=n_D; 3730 if (pcbddc->coarse_phi_D) { 3731 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3732 } 3733 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3734 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3735 PetscScalar *marray; 3736 3737 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3738 ierr = PetscFree(marray);CHKERRQ(ierr); 3739 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3740 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3741 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3742 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3743 } 3744 } 3745 3746 if (!pcbddc->coarse_phi_B) { 3747 PetscScalar *marr; 3748 3749 /* memory size */ 3750 n = n_B*pcbddc->local_primal_size; 3751 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 3752 if (!pcbddc->symmetric_primal) n *= 2; 3753 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 3754 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3755 marr += n_B*pcbddc->local_primal_size; 3756 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3757 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3758 marr += n_D*pcbddc->local_primal_size; 3759 } 3760 if (!pcbddc->symmetric_primal) { 3761 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3762 marr += n_B*pcbddc->local_primal_size; 3763 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3764 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3765 } 3766 } else { 3767 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3768 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3769 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3770 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3771 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3772 } 3773 } 3774 } 3775 3776 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3777 p0_lidx_I = NULL; 3778 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3779 const PetscInt *idxs; 3780 3781 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3782 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3783 for (i=0;i<pcbddc->benign_n;i++) { 3784 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3785 } 3786 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3787 } 3788 3789 /* vertices */ 3790 if (n_vertices) { 3791 3792 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3793 3794 if (n_R) { 3795 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3796 PetscBLASInt B_N,B_one = 1; 3797 PetscScalar *x,*y; 3798 PetscBool isseqaij; 3799 3800 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3801 if (need_benign_correction) { 3802 ISLocalToGlobalMapping RtoN; 3803 IS is_p0; 3804 PetscInt *idxs_p0,n; 3805 3806 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3807 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3808 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3809 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3810 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3811 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3812 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3813 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3814 } 3815 3816 if (lda_rhs == n_R) { 3817 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3818 } else { 3819 PetscScalar *av,*array; 3820 const PetscInt *xadj,*adjncy; 3821 PetscInt n; 3822 PetscBool flg_row; 3823 3824 array = work+lda_rhs*n_vertices; 3825 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3826 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3827 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3828 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3829 for (i=0;i<n;i++) { 3830 PetscInt j; 3831 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3832 } 3833 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3834 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3835 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3836 } 3837 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3838 if (need_benign_correction) { 3839 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3840 PetscScalar *marr; 3841 3842 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3843 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3844 3845 | 0 0 0 | (V) 3846 L = | 0 0 -1 | (P-p0) 3847 | 0 0 -1 | (p0) 3848 3849 */ 3850 for (i=0;i<reuse_solver->benign_n;i++) { 3851 const PetscScalar *vals; 3852 const PetscInt *idxs,*idxs_zero; 3853 PetscInt n,j,nz; 3854 3855 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3856 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3857 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3858 for (j=0;j<n;j++) { 3859 PetscScalar val = vals[j]; 3860 PetscInt k,col = idxs[j]; 3861 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3862 } 3863 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3864 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3865 } 3866 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3867 } 3868 if (F) { 3869 /* need to correct the rhs */ 3870 if (need_benign_correction) { 3871 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3872 PetscScalar *marr; 3873 3874 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3875 if (lda_rhs != n_R) { 3876 for (i=0;i<n_vertices;i++) { 3877 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3878 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3879 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3880 } 3881 } else { 3882 for (i=0;i<n_vertices;i++) { 3883 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3884 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3885 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3886 } 3887 } 3888 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3889 } 3890 ierr = MatMatSolve(F,A_RV,A_RRmA_RV);CHKERRQ(ierr); 3891 /* need to correct the solution */ 3892 if (need_benign_correction) { 3893 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3894 PetscScalar *marr; 3895 3896 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3897 if (lda_rhs != n_R) { 3898 for (i=0;i<n_vertices;i++) { 3899 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3900 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3901 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3902 } 3903 } else { 3904 for (i=0;i<n_vertices;i++) { 3905 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3906 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3907 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3908 } 3909 } 3910 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 3911 } 3912 } else { 3913 ierr = MatDenseGetArray(A_RV,&y);CHKERRQ(ierr); 3914 for (i=0;i<n_vertices;i++) { 3915 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 3916 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 3917 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3918 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3919 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3920 } 3921 ierr = MatDenseRestoreArray(A_RV,&y);CHKERRQ(ierr); 3922 } 3923 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3924 /* S_VV and S_CV */ 3925 if (n_constraints) { 3926 Mat B; 3927 3928 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3929 for (i=0;i<n_vertices;i++) { 3930 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3931 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 3932 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3933 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3934 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3935 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3936 } 3937 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3938 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 3939 ierr = MatDestroy(&B);CHKERRQ(ierr); 3940 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 3941 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 3942 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 3943 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 3944 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 3945 ierr = MatDestroy(&B);CHKERRQ(ierr); 3946 } 3947 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3948 if (!isseqaij) { /* MatMatMult with SEQ(S)BAIJ below will raise an error */ 3949 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3950 } 3951 if (lda_rhs != n_R) { 3952 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3953 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3954 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 3955 } 3956 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 3957 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 3958 if (need_benign_correction) { 3959 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3960 PetscScalar *marr,*sums; 3961 3962 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 3963 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 3964 for (i=0;i<reuse_solver->benign_n;i++) { 3965 const PetscScalar *vals; 3966 const PetscInt *idxs,*idxs_zero; 3967 PetscInt n,j,nz; 3968 3969 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3970 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3971 for (j=0;j<n_vertices;j++) { 3972 PetscInt k; 3973 sums[j] = 0.; 3974 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 3975 } 3976 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3977 for (j=0;j<n;j++) { 3978 PetscScalar val = vals[j]; 3979 PetscInt k; 3980 for (k=0;k<n_vertices;k++) { 3981 marr[idxs[j]+k*n_vertices] += val*sums[k]; 3982 } 3983 } 3984 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3985 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3986 } 3987 ierr = PetscFree(sums);CHKERRQ(ierr); 3988 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 3989 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 3990 } 3991 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 3992 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 3993 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 3994 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 3995 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 3996 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 3997 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 3998 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3999 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4000 } else { 4001 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4002 } 4003 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4004 4005 /* coarse basis functions */ 4006 for (i=0;i<n_vertices;i++) { 4007 PetscScalar *y; 4008 4009 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4010 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4011 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4012 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4013 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4014 y[n_B*i+idx_V_B[i]] = 1.0; 4015 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4016 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4017 4018 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4019 PetscInt j; 4020 4021 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4022 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4023 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4024 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4025 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4026 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4027 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4028 } 4029 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4030 } 4031 /* if n_R == 0 the object is not destroyed */ 4032 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4033 } 4034 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4035 4036 if (n_constraints) { 4037 Mat B; 4038 4039 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4040 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4041 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4042 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4043 if (n_vertices) { 4044 if (isCHOL) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4045 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4046 } else { 4047 Mat S_VCt; 4048 4049 if (lda_rhs != n_R) { 4050 ierr = MatDestroy(&B);CHKERRQ(ierr); 4051 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4052 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4053 } 4054 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4055 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4056 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4057 } 4058 } 4059 ierr = MatDestroy(&B);CHKERRQ(ierr); 4060 /* coarse basis functions */ 4061 for (i=0;i<n_constraints;i++) { 4062 PetscScalar *y; 4063 4064 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4065 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4066 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4067 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4068 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4069 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4070 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4071 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4072 PetscInt j; 4073 4074 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4075 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4076 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4077 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4078 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4079 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4080 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4081 } 4082 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4083 } 4084 } 4085 if (n_constraints) { 4086 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4087 } 4088 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4089 4090 /* coarse matrix entries relative to B_0 */ 4091 if (pcbddc->benign_n) { 4092 Mat B0_B,B0_BPHI; 4093 IS is_dummy; 4094 PetscScalar *data; 4095 PetscInt j; 4096 4097 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4098 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4099 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4100 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4101 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4102 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 4103 for (j=0;j<pcbddc->benign_n;j++) { 4104 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4105 for (i=0;i<pcbddc->local_primal_size;i++) { 4106 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4107 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4108 } 4109 } 4110 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 4111 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4112 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4113 } 4114 4115 /* compute other basis functions for non-symmetric problems */ 4116 if (!pcbddc->symmetric_primal) { 4117 Mat B_V=NULL,B_C=NULL; 4118 PetscScalar *marray; 4119 4120 if (n_constraints) { 4121 Mat S_CCT,C_CRT; 4122 4123 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4124 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4125 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4126 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4127 if (n_vertices) { 4128 Mat S_VCT; 4129 4130 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4131 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4132 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4133 } 4134 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4135 } else { 4136 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4137 } 4138 if (n_vertices && n_R) { 4139 PetscScalar *av,*marray; 4140 const PetscInt *xadj,*adjncy; 4141 PetscInt n; 4142 PetscBool flg_row; 4143 4144 /* B_V = B_V - A_VR^T */ 4145 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4146 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4147 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4148 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4149 for (i=0;i<n;i++) { 4150 PetscInt j; 4151 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4152 } 4153 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4154 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4155 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4156 } 4157 4158 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4159 if (n_vertices) { 4160 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4161 for (i=0;i<n_vertices;i++) { 4162 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4163 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4164 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4165 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4166 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4167 } 4168 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4169 } 4170 if (B_C) { 4171 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4172 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4173 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4174 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4175 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4176 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4177 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4178 } 4179 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4180 } 4181 /* coarse basis functions */ 4182 for (i=0;i<pcbddc->local_primal_size;i++) { 4183 PetscScalar *y; 4184 4185 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4186 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4187 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4188 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4189 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4190 if (i<n_vertices) { 4191 y[n_B*i+idx_V_B[i]] = 1.0; 4192 } 4193 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4194 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4195 4196 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4197 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4198 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4199 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4200 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4201 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4202 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4203 } 4204 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4205 } 4206 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4207 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4208 } 4209 4210 /* free memory */ 4211 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4212 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4213 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4214 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4215 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4216 ierr = PetscFree(work);CHKERRQ(ierr); 4217 if (n_vertices) { 4218 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4219 } 4220 if (n_constraints) { 4221 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4222 } 4223 /* Checking coarse_sub_mat and coarse basis functios */ 4224 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4225 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4226 if (pcbddc->dbg_flag) { 4227 Mat coarse_sub_mat; 4228 Mat AUXMAT,TM1,TM2,TM3,TM4; 4229 Mat coarse_phi_D,coarse_phi_B; 4230 Mat coarse_psi_D,coarse_psi_B; 4231 Mat A_II,A_BB,A_IB,A_BI; 4232 Mat C_B,CPHI; 4233 IS is_dummy; 4234 Vec mones; 4235 MatType checkmattype=MATSEQAIJ; 4236 PetscReal real_value; 4237 4238 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4239 Mat A; 4240 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4241 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4242 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4243 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4244 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4245 ierr = MatDestroy(&A);CHKERRQ(ierr); 4246 } else { 4247 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4248 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4249 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4250 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4251 } 4252 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4253 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4254 if (!pcbddc->symmetric_primal) { 4255 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4256 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4257 } 4258 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4259 4260 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4261 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4262 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4263 if (!pcbddc->symmetric_primal) { 4264 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4265 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4266 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4267 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4268 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4269 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4270 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4271 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4272 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4273 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4274 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4275 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4276 } else { 4277 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4278 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4279 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4280 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4281 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4282 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4283 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4284 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4285 } 4286 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4287 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4288 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4289 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4290 if (pcbddc->benign_n) { 4291 Mat B0_B,B0_BPHI; 4292 PetscScalar *data,*data2; 4293 PetscInt j; 4294 4295 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4296 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4297 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4298 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4299 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4300 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4301 for (j=0;j<pcbddc->benign_n;j++) { 4302 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4303 for (i=0;i<pcbddc->local_primal_size;i++) { 4304 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4305 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4306 } 4307 } 4308 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4309 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4310 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4311 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4312 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4313 } 4314 #if 0 4315 { 4316 PetscViewer viewer; 4317 char filename[256]; 4318 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4319 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4320 ierr = PetscViewerSetFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4321 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4322 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4323 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4324 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4325 if (save_change) { 4326 Mat phi_B; 4327 ierr = MatMatMult(save_change,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&phi_B);CHKERRQ(ierr); 4328 ierr = PetscObjectSetName((PetscObject)phi_B,"phi_B");CHKERRQ(ierr); 4329 ierr = MatView(phi_B,viewer);CHKERRQ(ierr); 4330 ierr = MatDestroy(&phi_B);CHKERRQ(ierr); 4331 } else { 4332 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4333 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4334 } 4335 if (pcbddc->coarse_phi_D) { 4336 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4337 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4338 } 4339 if (pcbddc->coarse_psi_B) { 4340 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4341 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4342 } 4343 if (pcbddc->coarse_psi_D) { 4344 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4345 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4346 } 4347 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4348 } 4349 #endif 4350 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4351 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4352 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4353 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4354 4355 /* check constraints */ 4356 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4357 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4358 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4359 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4360 } else { 4361 PetscScalar *data; 4362 Mat tmat; 4363 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4364 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4365 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4366 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4367 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4368 } 4369 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4370 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4371 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4372 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4373 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4374 if (!pcbddc->symmetric_primal) { 4375 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4376 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4377 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4378 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4379 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4380 } 4381 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4382 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4383 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4384 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4385 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4386 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4387 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4388 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4389 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4390 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4391 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4392 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4393 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4394 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4395 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4396 if (!pcbddc->symmetric_primal) { 4397 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4398 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4399 } 4400 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4401 } 4402 /* get back data */ 4403 *coarse_submat_vals_n = coarse_submat_vals; 4404 PetscFunctionReturn(0); 4405 } 4406 4407 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4408 { 4409 Mat *work_mat; 4410 IS isrow_s,iscol_s; 4411 PetscBool rsorted,csorted; 4412 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4413 PetscErrorCode ierr; 4414 4415 PetscFunctionBegin; 4416 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4417 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4418 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4419 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4420 4421 if (!rsorted) { 4422 const PetscInt *idxs; 4423 PetscInt *idxs_sorted,i; 4424 4425 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4426 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4427 for (i=0;i<rsize;i++) { 4428 idxs_perm_r[i] = i; 4429 } 4430 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4431 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4432 for (i=0;i<rsize;i++) { 4433 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4434 } 4435 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4436 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4437 } else { 4438 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4439 isrow_s = isrow; 4440 } 4441 4442 if (!csorted) { 4443 if (isrow == iscol) { 4444 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4445 iscol_s = isrow_s; 4446 } else { 4447 const PetscInt *idxs; 4448 PetscInt *idxs_sorted,i; 4449 4450 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4451 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4452 for (i=0;i<csize;i++) { 4453 idxs_perm_c[i] = i; 4454 } 4455 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4456 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4457 for (i=0;i<csize;i++) { 4458 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4459 } 4460 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4461 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4462 } 4463 } else { 4464 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4465 iscol_s = iscol; 4466 } 4467 4468 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4469 4470 if (!rsorted || !csorted) { 4471 Mat new_mat; 4472 IS is_perm_r,is_perm_c; 4473 4474 if (!rsorted) { 4475 PetscInt *idxs_r,i; 4476 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4477 for (i=0;i<rsize;i++) { 4478 idxs_r[idxs_perm_r[i]] = i; 4479 } 4480 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4481 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4482 } else { 4483 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4484 } 4485 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4486 4487 if (!csorted) { 4488 if (isrow_s == iscol_s) { 4489 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4490 is_perm_c = is_perm_r; 4491 } else { 4492 PetscInt *idxs_c,i; 4493 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4494 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4495 for (i=0;i<csize;i++) { 4496 idxs_c[idxs_perm_c[i]] = i; 4497 } 4498 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4499 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4500 } 4501 } else { 4502 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4503 } 4504 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4505 4506 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4507 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4508 work_mat[0] = new_mat; 4509 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4510 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4511 } 4512 4513 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4514 *B = work_mat[0]; 4515 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4516 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4517 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4518 PetscFunctionReturn(0); 4519 } 4520 4521 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4522 { 4523 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4524 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4525 Mat new_mat,lA; 4526 IS is_local,is_global; 4527 PetscInt local_size; 4528 PetscBool isseqaij; 4529 PetscErrorCode ierr; 4530 4531 PetscFunctionBegin; 4532 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4533 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4534 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4535 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4536 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4537 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4538 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4539 4540 /* check */ 4541 if (pcbddc->dbg_flag) { 4542 Vec x,x_change; 4543 PetscReal error; 4544 4545 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4546 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4547 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4548 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4549 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4550 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4551 if (!pcbddc->change_interior) { 4552 const PetscScalar *x,*y,*v; 4553 PetscReal lerror = 0.; 4554 PetscInt i; 4555 4556 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4557 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4558 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4559 for (i=0;i<local_size;i++) 4560 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4561 lerror = PetscAbsScalar(x[i]-y[i]); 4562 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4563 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4564 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4565 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4566 if (error > PETSC_SMALL) { 4567 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4568 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4569 } else { 4570 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4571 } 4572 } 4573 } 4574 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4575 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4576 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4577 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4578 if (error > PETSC_SMALL) { 4579 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4580 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4581 } else { 4582 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4583 } 4584 } 4585 ierr = VecDestroy(&x);CHKERRQ(ierr); 4586 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4587 } 4588 4589 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4590 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4591 4592 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4593 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4594 if (isseqaij) { 4595 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4596 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4597 if (lA) { 4598 Mat work; 4599 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4600 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4601 ierr = MatDestroy(&work);CHKERRQ(ierr); 4602 } 4603 } else { 4604 Mat work_mat; 4605 4606 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4607 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4608 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4609 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4610 if (lA) { 4611 Mat work; 4612 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4613 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4614 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4615 ierr = MatDestroy(&work);CHKERRQ(ierr); 4616 } 4617 } 4618 if (matis->A->symmetric_set) { 4619 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4620 #if !defined(PETSC_USE_COMPLEX) 4621 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4622 #endif 4623 } 4624 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4625 PetscFunctionReturn(0); 4626 } 4627 4628 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4629 { 4630 PC_IS* pcis = (PC_IS*)(pc->data); 4631 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4632 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4633 PetscInt *idx_R_local=NULL; 4634 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4635 PetscInt vbs,bs; 4636 PetscBT bitmask=NULL; 4637 PetscErrorCode ierr; 4638 4639 PetscFunctionBegin; 4640 /* 4641 No need to setup local scatters if 4642 - primal space is unchanged 4643 AND 4644 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4645 AND 4646 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4647 */ 4648 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4649 PetscFunctionReturn(0); 4650 } 4651 /* destroy old objects */ 4652 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4653 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4654 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4655 /* Set Non-overlapping dimensions */ 4656 n_B = pcis->n_B; 4657 n_D = pcis->n - n_B; 4658 n_vertices = pcbddc->n_vertices; 4659 4660 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4661 4662 /* create auxiliary bitmask and allocate workspace */ 4663 if (!sub_schurs || !sub_schurs->reuse_solver) { 4664 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4665 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4666 for (i=0;i<n_vertices;i++) { 4667 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4668 } 4669 4670 for (i=0, n_R=0; i<pcis->n; i++) { 4671 if (!PetscBTLookup(bitmask,i)) { 4672 idx_R_local[n_R++] = i; 4673 } 4674 } 4675 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4676 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4677 4678 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4679 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4680 } 4681 4682 /* Block code */ 4683 vbs = 1; 4684 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4685 if (bs>1 && !(n_vertices%bs)) { 4686 PetscBool is_blocked = PETSC_TRUE; 4687 PetscInt *vary; 4688 if (!sub_schurs || !sub_schurs->reuse_solver) { 4689 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4690 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4691 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4692 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4693 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4694 for (i=0; i<pcis->n/bs; i++) { 4695 if (vary[i]!=0 && vary[i]!=bs) { 4696 is_blocked = PETSC_FALSE; 4697 break; 4698 } 4699 } 4700 ierr = PetscFree(vary);CHKERRQ(ierr); 4701 } else { 4702 /* Verify directly the R set */ 4703 for (i=0; i<n_R/bs; i++) { 4704 PetscInt j,node=idx_R_local[bs*i]; 4705 for (j=1; j<bs; j++) { 4706 if (node != idx_R_local[bs*i+j]-j) { 4707 is_blocked = PETSC_FALSE; 4708 break; 4709 } 4710 } 4711 } 4712 } 4713 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4714 vbs = bs; 4715 for (i=0;i<n_R/vbs;i++) { 4716 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4717 } 4718 } 4719 } 4720 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4721 if (sub_schurs && sub_schurs->reuse_solver) { 4722 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4723 4724 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4725 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4726 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4727 reuse_solver->is_R = pcbddc->is_R_local; 4728 } else { 4729 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4730 } 4731 4732 /* print some info if requested */ 4733 if (pcbddc->dbg_flag) { 4734 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4735 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4736 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4737 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4738 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4739 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4740 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4741 } 4742 4743 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4744 if (!sub_schurs || !sub_schurs->reuse_solver) { 4745 IS is_aux1,is_aux2; 4746 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4747 4748 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4749 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4750 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4751 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4752 for (i=0; i<n_D; i++) { 4753 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4754 } 4755 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4756 for (i=0, j=0; i<n_R; i++) { 4757 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4758 aux_array1[j++] = i; 4759 } 4760 } 4761 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4762 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4763 for (i=0, j=0; i<n_B; i++) { 4764 if (!PetscBTLookup(bitmask,is_indices[i])) { 4765 aux_array2[j++] = i; 4766 } 4767 } 4768 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4769 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4770 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4771 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4772 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4773 4774 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4775 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4776 for (i=0, j=0; i<n_R; i++) { 4777 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4778 aux_array1[j++] = i; 4779 } 4780 } 4781 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4782 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4783 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4784 } 4785 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4786 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4787 } else { 4788 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4789 IS tis; 4790 PetscInt schur_size; 4791 4792 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4793 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4794 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4795 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4796 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4797 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4798 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4799 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4800 } 4801 } 4802 PetscFunctionReturn(0); 4803 } 4804 4805 4806 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4807 { 4808 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4809 PC_IS *pcis = (PC_IS*)pc->data; 4810 PC pc_temp; 4811 Mat A_RR; 4812 MatReuse reuse; 4813 PetscScalar m_one = -1.0; 4814 PetscReal value; 4815 PetscInt n_D,n_R; 4816 PetscBool check_corr[2],issbaij; 4817 PetscErrorCode ierr; 4818 /* prefixes stuff */ 4819 char dir_prefix[256],neu_prefix[256],str_level[16]; 4820 size_t len; 4821 4822 PetscFunctionBegin; 4823 4824 /* compute prefixes */ 4825 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4826 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4827 if (!pcbddc->current_level) { 4828 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4829 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4830 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4831 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4832 } else { 4833 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 4834 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 4835 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4836 len -= 15; /* remove "pc_bddc_coarse_" */ 4837 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4838 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4839 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4840 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4841 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4842 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4843 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4844 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4845 } 4846 4847 /* DIRICHLET PROBLEM */ 4848 if (dirichlet) { 4849 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4850 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4851 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4852 if (pcbddc->dbg_flag) { 4853 Mat A_IIn; 4854 4855 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4856 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4857 pcis->A_II = A_IIn; 4858 } 4859 } 4860 if (pcbddc->local_mat->symmetric_set) { 4861 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4862 } 4863 /* Matrix for Dirichlet problem is pcis->A_II */ 4864 n_D = pcis->n - pcis->n_B; 4865 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4866 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4867 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4868 /* default */ 4869 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4870 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4871 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4872 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4873 if (issbaij) { 4874 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4875 } else { 4876 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4877 } 4878 /* Allow user's customization */ 4879 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 4880 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4881 } 4882 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 4883 if (sub_schurs && sub_schurs->reuse_solver) { 4884 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4885 4886 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 4887 } 4888 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4889 if (!n_D) { 4890 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4891 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4892 } 4893 /* Set Up KSP for Dirichlet problem of BDDC */ 4894 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 4895 /* set ksp_D into pcis data */ 4896 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 4897 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 4898 pcis->ksp_D = pcbddc->ksp_D; 4899 } 4900 4901 /* NEUMANN PROBLEM */ 4902 A_RR = 0; 4903 if (neumann) { 4904 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4905 PetscInt ibs,mbs; 4906 PetscBool issbaij; 4907 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4908 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 4909 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 4910 if (pcbddc->ksp_R) { /* already created ksp */ 4911 PetscInt nn_R; 4912 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 4913 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4914 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 4915 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 4916 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 4917 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4918 reuse = MAT_INITIAL_MATRIX; 4919 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 4920 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 4921 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4922 reuse = MAT_INITIAL_MATRIX; 4923 } else { /* safe to reuse the matrix */ 4924 reuse = MAT_REUSE_MATRIX; 4925 } 4926 } 4927 /* last check */ 4928 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 4929 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4930 reuse = MAT_INITIAL_MATRIX; 4931 } 4932 } else { /* first time, so we need to create the matrix */ 4933 reuse = MAT_INITIAL_MATRIX; 4934 } 4935 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 4936 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 4937 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 4938 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4939 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 4940 if (matis->A == pcbddc->local_mat) { 4941 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4942 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4943 } else { 4944 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4945 } 4946 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 4947 if (matis->A == pcbddc->local_mat) { 4948 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4949 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4950 } else { 4951 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 4952 } 4953 } 4954 /* extract A_RR */ 4955 if (sub_schurs && sub_schurs->reuse_solver) { 4956 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4957 4958 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 4959 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4960 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 4961 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 4962 } else { 4963 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 4964 } 4965 } else { 4966 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 4967 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 4968 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 4969 } 4970 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 4971 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 4972 } 4973 if (pcbddc->local_mat->symmetric_set) { 4974 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4975 } 4976 if (!pcbddc->ksp_R) { /* create object if not present */ 4977 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 4978 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 4979 /* default */ 4980 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 4981 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 4982 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4983 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4984 if (issbaij) { 4985 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4986 } else { 4987 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4988 } 4989 /* Allow user's customization */ 4990 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 4991 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 4992 } 4993 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 4994 if (!n_R) { 4995 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 4996 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 4997 } 4998 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 4999 /* Reuse solver if it is present */ 5000 if (sub_schurs && sub_schurs->reuse_solver && sub_schurs->A == pcbddc->local_mat) { 5001 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5002 5003 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5004 } 5005 /* Set Up KSP for Neumann problem of BDDC */ 5006 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 5007 } 5008 5009 if (pcbddc->dbg_flag) { 5010 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5011 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5012 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5013 } 5014 5015 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5016 check_corr[0] = check_corr[1] = PETSC_FALSE; 5017 if (pcbddc->NullSpace_corr[0]) { 5018 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5019 } 5020 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5021 check_corr[0] = PETSC_TRUE; 5022 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5023 } 5024 if (neumann && pcbddc->NullSpace_corr[2]) { 5025 check_corr[1] = PETSC_TRUE; 5026 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5027 } 5028 5029 /* check Dirichlet and Neumann solvers */ 5030 if (pcbddc->dbg_flag) { 5031 if (dirichlet) { /* Dirichlet */ 5032 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5033 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5034 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5035 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5036 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5037 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5038 if (check_corr[0]) { 5039 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 5040 } 5041 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5042 } 5043 if (neumann) { /* Neumann */ 5044 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5045 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5046 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5047 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5048 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5049 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5050 if (check_corr[1]) { 5051 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 5052 } 5053 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5054 } 5055 } 5056 /* free Neumann problem's matrix */ 5057 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5058 PetscFunctionReturn(0); 5059 } 5060 5061 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5062 { 5063 PetscErrorCode ierr; 5064 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5065 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5066 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 5067 5068 PetscFunctionBegin; 5069 if (!reuse_solver) { 5070 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5071 } 5072 if (!pcbddc->switch_static) { 5073 if (applytranspose && pcbddc->local_auxmat1) { 5074 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5075 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5076 } 5077 if (!reuse_solver) { 5078 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5079 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5080 } else { 5081 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5082 5083 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5084 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5085 } 5086 } else { 5087 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5088 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5089 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5090 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5091 if (applytranspose && pcbddc->local_auxmat1) { 5092 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5093 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5094 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5095 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5096 } 5097 } 5098 if (!reuse_solver || pcbddc->switch_static) { 5099 if (applytranspose) { 5100 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5101 } else { 5102 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5103 } 5104 } else { 5105 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5106 5107 if (applytranspose) { 5108 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5109 } else { 5110 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5111 } 5112 } 5113 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5114 if (!pcbddc->switch_static) { 5115 if (!reuse_solver) { 5116 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5117 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5118 } else { 5119 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5120 5121 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5122 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5123 } 5124 if (!applytranspose && pcbddc->local_auxmat1) { 5125 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5126 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5127 } 5128 } else { 5129 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5130 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5131 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5132 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5133 if (!applytranspose && pcbddc->local_auxmat1) { 5134 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5135 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5136 } 5137 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5138 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5139 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5140 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5141 } 5142 PetscFunctionReturn(0); 5143 } 5144 5145 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5146 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5147 { 5148 PetscErrorCode ierr; 5149 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5150 PC_IS* pcis = (PC_IS*) (pc->data); 5151 const PetscScalar zero = 0.0; 5152 5153 PetscFunctionBegin; 5154 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5155 if (!pcbddc->benign_apply_coarse_only) { 5156 if (applytranspose) { 5157 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5158 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5159 } else { 5160 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5161 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5162 } 5163 } else { 5164 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5165 } 5166 5167 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5168 if (pcbddc->benign_n) { 5169 PetscScalar *array; 5170 PetscInt j; 5171 5172 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5173 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5174 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5175 } 5176 5177 /* start communications from local primal nodes to rhs of coarse solver */ 5178 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5179 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5180 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5181 5182 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5183 if (pcbddc->coarse_ksp) { 5184 Mat coarse_mat; 5185 Vec rhs,sol; 5186 MatNullSpace nullsp; 5187 PetscBool isbddc = PETSC_FALSE; 5188 5189 if (pcbddc->benign_have_null) { 5190 PC coarse_pc; 5191 5192 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5193 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5194 /* we need to propagate to coarser levels the need for a possible benign correction */ 5195 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5196 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5197 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5198 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5199 } 5200 } 5201 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5202 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5203 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5204 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5205 if (nullsp) { 5206 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5207 } 5208 if (applytranspose) { 5209 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5210 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5211 } else { 5212 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5213 PC coarse_pc; 5214 5215 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5216 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5217 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5218 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5219 } else { 5220 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5221 } 5222 } 5223 /* we don't need the benign correction at coarser levels anymore */ 5224 if (pcbddc->benign_have_null && isbddc) { 5225 PC coarse_pc; 5226 PC_BDDC* coarsepcbddc; 5227 5228 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5229 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5230 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5231 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5232 } 5233 if (nullsp) { 5234 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5235 } 5236 } 5237 5238 /* Local solution on R nodes */ 5239 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5240 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5241 } 5242 /* communications from coarse sol to local primal nodes */ 5243 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5244 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5245 5246 /* Sum contributions from the two levels */ 5247 if (!pcbddc->benign_apply_coarse_only) { 5248 if (applytranspose) { 5249 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5250 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5251 } else { 5252 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5253 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5254 } 5255 /* store p0 */ 5256 if (pcbddc->benign_n) { 5257 PetscScalar *array; 5258 PetscInt j; 5259 5260 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5261 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5262 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5263 } 5264 } else { /* expand the coarse solution */ 5265 if (applytranspose) { 5266 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5267 } else { 5268 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5269 } 5270 } 5271 PetscFunctionReturn(0); 5272 } 5273 5274 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5275 { 5276 PetscErrorCode ierr; 5277 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5278 PetscScalar *array; 5279 Vec from,to; 5280 5281 PetscFunctionBegin; 5282 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5283 from = pcbddc->coarse_vec; 5284 to = pcbddc->vec1_P; 5285 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5286 Vec tvec; 5287 5288 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5289 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5290 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5291 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5292 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5293 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5294 } 5295 } else { /* from local to global -> put data in coarse right hand side */ 5296 from = pcbddc->vec1_P; 5297 to = pcbddc->coarse_vec; 5298 } 5299 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5300 PetscFunctionReturn(0); 5301 } 5302 5303 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5304 { 5305 PetscErrorCode ierr; 5306 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5307 PetscScalar *array; 5308 Vec from,to; 5309 5310 PetscFunctionBegin; 5311 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5312 from = pcbddc->coarse_vec; 5313 to = pcbddc->vec1_P; 5314 } else { /* from local to global -> put data in coarse right hand side */ 5315 from = pcbddc->vec1_P; 5316 to = pcbddc->coarse_vec; 5317 } 5318 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5319 if (smode == SCATTER_FORWARD) { 5320 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5321 Vec tvec; 5322 5323 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5324 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5325 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5326 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5327 } 5328 } else { 5329 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5330 ierr = VecResetArray(from);CHKERRQ(ierr); 5331 } 5332 } 5333 PetscFunctionReturn(0); 5334 } 5335 5336 /* uncomment for testing purposes */ 5337 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5338 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5339 { 5340 PetscErrorCode ierr; 5341 PC_IS* pcis = (PC_IS*)(pc->data); 5342 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5343 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5344 /* one and zero */ 5345 PetscScalar one=1.0,zero=0.0; 5346 /* space to store constraints and their local indices */ 5347 PetscScalar *constraints_data; 5348 PetscInt *constraints_idxs,*constraints_idxs_B; 5349 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5350 PetscInt *constraints_n; 5351 /* iterators */ 5352 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5353 /* BLAS integers */ 5354 PetscBLASInt lwork,lierr; 5355 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5356 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5357 /* reuse */ 5358 PetscInt olocal_primal_size,olocal_primal_size_cc; 5359 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5360 /* change of basis */ 5361 PetscBool qr_needed; 5362 PetscBT change_basis,qr_needed_idx; 5363 /* auxiliary stuff */ 5364 PetscInt *nnz,*is_indices; 5365 PetscInt ncc; 5366 /* some quantities */ 5367 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5368 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5369 5370 PetscFunctionBegin; 5371 /* Destroy Mat objects computed previously */ 5372 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5373 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5374 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5375 /* save info on constraints from previous setup (if any) */ 5376 olocal_primal_size = pcbddc->local_primal_size; 5377 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5378 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5379 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5380 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5381 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5382 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5383 5384 if (!pcbddc->adaptive_selection) { 5385 IS ISForVertices,*ISForFaces,*ISForEdges; 5386 MatNullSpace nearnullsp; 5387 const Vec *nearnullvecs; 5388 Vec *localnearnullsp; 5389 PetscScalar *array; 5390 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5391 PetscBool nnsp_has_cnst; 5392 /* LAPACK working arrays for SVD or POD */ 5393 PetscBool skip_lapack,boolforchange; 5394 PetscScalar *work; 5395 PetscReal *singular_vals; 5396 #if defined(PETSC_USE_COMPLEX) 5397 PetscReal *rwork; 5398 #endif 5399 #if defined(PETSC_MISSING_LAPACK_GESVD) 5400 PetscScalar *temp_basis,*correlation_mat; 5401 #else 5402 PetscBLASInt dummy_int=1; 5403 PetscScalar dummy_scalar=1.; 5404 #endif 5405 5406 /* Get index sets for faces, edges and vertices from graph */ 5407 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5408 /* print some info */ 5409 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5410 PetscInt nv; 5411 5412 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5413 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5414 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5415 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5416 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5417 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5418 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5419 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5420 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5421 } 5422 5423 /* free unneeded index sets */ 5424 if (!pcbddc->use_vertices) { 5425 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5426 } 5427 if (!pcbddc->use_edges) { 5428 for (i=0;i<n_ISForEdges;i++) { 5429 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5430 } 5431 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5432 n_ISForEdges = 0; 5433 } 5434 if (!pcbddc->use_faces) { 5435 for (i=0;i<n_ISForFaces;i++) { 5436 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5437 } 5438 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5439 n_ISForFaces = 0; 5440 } 5441 5442 /* check if near null space is attached to global mat */ 5443 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5444 if (nearnullsp) { 5445 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5446 /* remove any stored info */ 5447 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5448 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5449 /* store information for BDDC solver reuse */ 5450 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5451 pcbddc->onearnullspace = nearnullsp; 5452 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5453 for (i=0;i<nnsp_size;i++) { 5454 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5455 } 5456 } else { /* if near null space is not provided BDDC uses constants by default */ 5457 nnsp_size = 0; 5458 nnsp_has_cnst = PETSC_TRUE; 5459 } 5460 /* get max number of constraints on a single cc */ 5461 max_constraints = nnsp_size; 5462 if (nnsp_has_cnst) max_constraints++; 5463 5464 /* 5465 Evaluate maximum storage size needed by the procedure 5466 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5467 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5468 There can be multiple constraints per connected component 5469 */ 5470 n_vertices = 0; 5471 if (ISForVertices) { 5472 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5473 } 5474 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5475 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5476 5477 total_counts = n_ISForFaces+n_ISForEdges; 5478 total_counts *= max_constraints; 5479 total_counts += n_vertices; 5480 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5481 5482 total_counts = 0; 5483 max_size_of_constraint = 0; 5484 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5485 IS used_is; 5486 if (i<n_ISForEdges) { 5487 used_is = ISForEdges[i]; 5488 } else { 5489 used_is = ISForFaces[i-n_ISForEdges]; 5490 } 5491 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5492 total_counts += j; 5493 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5494 } 5495 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5496 5497 /* get local part of global near null space vectors */ 5498 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5499 for (k=0;k<nnsp_size;k++) { 5500 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5501 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5502 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5503 } 5504 5505 /* whether or not to skip lapack calls */ 5506 skip_lapack = PETSC_TRUE; 5507 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5508 5509 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5510 if (!skip_lapack) { 5511 PetscScalar temp_work; 5512 5513 #if defined(PETSC_MISSING_LAPACK_GESVD) 5514 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5515 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5516 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5517 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5518 #if defined(PETSC_USE_COMPLEX) 5519 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5520 #endif 5521 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5522 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5523 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5524 lwork = -1; 5525 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5526 #if !defined(PETSC_USE_COMPLEX) 5527 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5528 #else 5529 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5530 #endif 5531 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5532 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5533 #else /* on missing GESVD */ 5534 /* SVD */ 5535 PetscInt max_n,min_n; 5536 max_n = max_size_of_constraint; 5537 min_n = max_constraints; 5538 if (max_size_of_constraint < max_constraints) { 5539 min_n = max_size_of_constraint; 5540 max_n = max_constraints; 5541 } 5542 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5543 #if defined(PETSC_USE_COMPLEX) 5544 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5545 #endif 5546 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5547 lwork = -1; 5548 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5549 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5550 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5551 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5552 #if !defined(PETSC_USE_COMPLEX) 5553 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5554 #else 5555 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5556 #endif 5557 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5558 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5559 #endif /* on missing GESVD */ 5560 /* Allocate optimal workspace */ 5561 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5562 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5563 } 5564 /* Now we can loop on constraining sets */ 5565 total_counts = 0; 5566 constraints_idxs_ptr[0] = 0; 5567 constraints_data_ptr[0] = 0; 5568 /* vertices */ 5569 if (n_vertices) { 5570 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5571 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5572 for (i=0;i<n_vertices;i++) { 5573 constraints_n[total_counts] = 1; 5574 constraints_data[total_counts] = 1.0; 5575 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5576 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5577 total_counts++; 5578 } 5579 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5580 n_vertices = total_counts; 5581 } 5582 5583 /* edges and faces */ 5584 total_counts_cc = total_counts; 5585 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5586 IS used_is; 5587 PetscBool idxs_copied = PETSC_FALSE; 5588 5589 if (ncc<n_ISForEdges) { 5590 used_is = ISForEdges[ncc]; 5591 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5592 } else { 5593 used_is = ISForFaces[ncc-n_ISForEdges]; 5594 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5595 } 5596 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5597 5598 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5599 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5600 /* change of basis should not be performed on local periodic nodes */ 5601 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5602 if (nnsp_has_cnst) { 5603 PetscScalar quad_value; 5604 5605 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5606 idxs_copied = PETSC_TRUE; 5607 5608 if (!pcbddc->use_nnsp_true) { 5609 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5610 } else { 5611 quad_value = 1.0; 5612 } 5613 for (j=0;j<size_of_constraint;j++) { 5614 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5615 } 5616 temp_constraints++; 5617 total_counts++; 5618 } 5619 for (k=0;k<nnsp_size;k++) { 5620 PetscReal real_value; 5621 PetscScalar *ptr_to_data; 5622 5623 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5624 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5625 for (j=0;j<size_of_constraint;j++) { 5626 ptr_to_data[j] = array[is_indices[j]]; 5627 } 5628 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5629 /* check if array is null on the connected component */ 5630 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5631 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5632 if (real_value > 0.0) { /* keep indices and values */ 5633 temp_constraints++; 5634 total_counts++; 5635 if (!idxs_copied) { 5636 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5637 idxs_copied = PETSC_TRUE; 5638 } 5639 } 5640 } 5641 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5642 valid_constraints = temp_constraints; 5643 if (!pcbddc->use_nnsp_true && temp_constraints) { 5644 if (temp_constraints == 1) { /* just normalize the constraint */ 5645 PetscScalar norm,*ptr_to_data; 5646 5647 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5648 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5649 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5650 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5651 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5652 } else { /* perform SVD */ 5653 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5654 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5655 5656 #if defined(PETSC_MISSING_LAPACK_GESVD) 5657 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5658 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5659 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5660 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5661 from that computed using LAPACKgesvd 5662 -> This is due to a different computation of eigenvectors in LAPACKheev 5663 -> The quality of the POD-computed basis will be the same */ 5664 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5665 /* Store upper triangular part of correlation matrix */ 5666 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5667 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5668 for (j=0;j<temp_constraints;j++) { 5669 for (k=0;k<j+1;k++) { 5670 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5671 } 5672 } 5673 /* compute eigenvalues and eigenvectors of correlation matrix */ 5674 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5675 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5676 #if !defined(PETSC_USE_COMPLEX) 5677 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5678 #else 5679 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5680 #endif 5681 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5682 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5683 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5684 j = 0; 5685 while (j < temp_constraints && singular_vals[j] < tol) j++; 5686 total_counts = total_counts-j; 5687 valid_constraints = temp_constraints-j; 5688 /* scale and copy POD basis into used quadrature memory */ 5689 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5690 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5691 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5692 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5693 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5694 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5695 if (j<temp_constraints) { 5696 PetscInt ii; 5697 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5698 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5699 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5700 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5701 for (k=0;k<temp_constraints-j;k++) { 5702 for (ii=0;ii<size_of_constraint;ii++) { 5703 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5704 } 5705 } 5706 } 5707 #else /* on missing GESVD */ 5708 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5709 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5710 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5711 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5712 #if !defined(PETSC_USE_COMPLEX) 5713 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5714 #else 5715 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5716 #endif 5717 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5718 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5719 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5720 k = temp_constraints; 5721 if (k > size_of_constraint) k = size_of_constraint; 5722 j = 0; 5723 while (j < k && singular_vals[k-j-1] < tol) j++; 5724 valid_constraints = k-j; 5725 total_counts = total_counts-temp_constraints+valid_constraints; 5726 #endif /* on missing GESVD */ 5727 } 5728 } 5729 /* update pointers information */ 5730 if (valid_constraints) { 5731 constraints_n[total_counts_cc] = valid_constraints; 5732 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5733 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5734 /* set change_of_basis flag */ 5735 if (boolforchange) { 5736 PetscBTSet(change_basis,total_counts_cc); 5737 } 5738 total_counts_cc++; 5739 } 5740 } 5741 /* free workspace */ 5742 if (!skip_lapack) { 5743 ierr = PetscFree(work);CHKERRQ(ierr); 5744 #if defined(PETSC_USE_COMPLEX) 5745 ierr = PetscFree(rwork);CHKERRQ(ierr); 5746 #endif 5747 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5748 #if defined(PETSC_MISSING_LAPACK_GESVD) 5749 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5750 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5751 #endif 5752 } 5753 for (k=0;k<nnsp_size;k++) { 5754 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5755 } 5756 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5757 /* free index sets of faces, edges and vertices */ 5758 for (i=0;i<n_ISForFaces;i++) { 5759 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5760 } 5761 if (n_ISForFaces) { 5762 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5763 } 5764 for (i=0;i<n_ISForEdges;i++) { 5765 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5766 } 5767 if (n_ISForEdges) { 5768 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5769 } 5770 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5771 } else { 5772 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5773 5774 total_counts = 0; 5775 n_vertices = 0; 5776 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5777 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5778 } 5779 max_constraints = 0; 5780 total_counts_cc = 0; 5781 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5782 total_counts += pcbddc->adaptive_constraints_n[i]; 5783 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5784 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5785 } 5786 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5787 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5788 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5789 constraints_data = pcbddc->adaptive_constraints_data; 5790 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5791 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5792 total_counts_cc = 0; 5793 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5794 if (pcbddc->adaptive_constraints_n[i]) { 5795 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5796 } 5797 } 5798 #if 0 5799 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5800 for (i=0;i<total_counts_cc;i++) { 5801 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5802 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5803 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5804 printf(" %d",constraints_idxs[j]); 5805 } 5806 printf("\n"); 5807 printf("number of cc: %d\n",constraints_n[i]); 5808 } 5809 for (i=0;i<n_vertices;i++) { 5810 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5811 } 5812 for (i=0;i<sub_schurs->n_subs;i++) { 5813 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5814 } 5815 #endif 5816 5817 max_size_of_constraint = 0; 5818 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5819 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5820 /* Change of basis */ 5821 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5822 if (pcbddc->use_change_of_basis) { 5823 for (i=0;i<sub_schurs->n_subs;i++) { 5824 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5825 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5826 } 5827 } 5828 } 5829 } 5830 pcbddc->local_primal_size = total_counts; 5831 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5832 5833 /* map constraints_idxs in boundary numbering */ 5834 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5835 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5836 5837 /* Create constraint matrix */ 5838 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5839 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5840 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5841 5842 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5843 /* determine if a QR strategy is needed for change of basis */ 5844 qr_needed = PETSC_FALSE; 5845 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5846 total_primal_vertices=0; 5847 pcbddc->local_primal_size_cc = 0; 5848 for (i=0;i<total_counts_cc;i++) { 5849 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5850 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5851 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5852 pcbddc->local_primal_size_cc += 1; 5853 } else if (PetscBTLookup(change_basis,i)) { 5854 for (k=0;k<constraints_n[i];k++) { 5855 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5856 } 5857 pcbddc->local_primal_size_cc += constraints_n[i]; 5858 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5859 PetscBTSet(qr_needed_idx,i); 5860 qr_needed = PETSC_TRUE; 5861 } 5862 } else { 5863 pcbddc->local_primal_size_cc += 1; 5864 } 5865 } 5866 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5867 pcbddc->n_vertices = total_primal_vertices; 5868 /* permute indices in order to have a sorted set of vertices */ 5869 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5870 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5871 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5872 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 5873 5874 /* nonzero structure of constraint matrix */ 5875 /* and get reference dof for local constraints */ 5876 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 5877 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 5878 5879 j = total_primal_vertices; 5880 total_counts = total_primal_vertices; 5881 cum = total_primal_vertices; 5882 for (i=n_vertices;i<total_counts_cc;i++) { 5883 if (!PetscBTLookup(change_basis,i)) { 5884 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 5885 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 5886 cum++; 5887 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5888 for (k=0;k<constraints_n[i];k++) { 5889 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5890 nnz[j+k] = size_of_constraint; 5891 } 5892 j += constraints_n[i]; 5893 } 5894 } 5895 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 5896 ierr = PetscFree(nnz);CHKERRQ(ierr); 5897 5898 /* set values in constraint matrix */ 5899 for (i=0;i<total_primal_vertices;i++) { 5900 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 5901 } 5902 total_counts = total_primal_vertices; 5903 for (i=n_vertices;i<total_counts_cc;i++) { 5904 if (!PetscBTLookup(change_basis,i)) { 5905 PetscInt *cols; 5906 5907 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5908 cols = constraints_idxs+constraints_idxs_ptr[i]; 5909 for (k=0;k<constraints_n[i];k++) { 5910 PetscInt row = total_counts+k; 5911 PetscScalar *vals; 5912 5913 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 5914 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 5915 } 5916 total_counts += constraints_n[i]; 5917 } 5918 } 5919 /* assembling */ 5920 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5921 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 5922 5923 /* 5924 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 5925 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 5926 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 5927 */ 5928 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 5929 if (pcbddc->use_change_of_basis) { 5930 /* dual and primal dofs on a single cc */ 5931 PetscInt dual_dofs,primal_dofs; 5932 /* working stuff for GEQRF */ 5933 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 5934 PetscBLASInt lqr_work; 5935 /* working stuff for UNGQR */ 5936 PetscScalar *gqr_work,lgqr_work_t; 5937 PetscBLASInt lgqr_work; 5938 /* working stuff for TRTRS */ 5939 PetscScalar *trs_rhs; 5940 PetscBLASInt Blas_NRHS; 5941 /* pointers for values insertion into change of basis matrix */ 5942 PetscInt *start_rows,*start_cols; 5943 PetscScalar *start_vals; 5944 /* working stuff for values insertion */ 5945 PetscBT is_primal; 5946 PetscInt *aux_primal_numbering_B; 5947 /* matrix sizes */ 5948 PetscInt global_size,local_size; 5949 /* temporary change of basis */ 5950 Mat localChangeOfBasisMatrix; 5951 /* extra space for debugging */ 5952 PetscScalar *dbg_work; 5953 5954 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 5955 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 5956 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 5957 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 5958 /* nonzeros for local mat */ 5959 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 5960 if (!pcbddc->benign_change || pcbddc->fake_change) { 5961 for (i=0;i<pcis->n;i++) nnz[i]=1; 5962 } else { 5963 const PetscInt *ii; 5964 PetscInt n; 5965 PetscBool flg_row; 5966 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5967 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 5968 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 5969 } 5970 for (i=n_vertices;i<total_counts_cc;i++) { 5971 if (PetscBTLookup(change_basis,i)) { 5972 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5973 if (PetscBTLookup(qr_needed_idx,i)) { 5974 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 5975 } else { 5976 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 5977 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 5978 } 5979 } 5980 } 5981 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 5982 ierr = PetscFree(nnz);CHKERRQ(ierr); 5983 /* Set interior change in the matrix */ 5984 if (!pcbddc->benign_change || pcbddc->fake_change) { 5985 for (i=0;i<pcis->n;i++) { 5986 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 5987 } 5988 } else { 5989 const PetscInt *ii,*jj; 5990 PetscScalar *aa; 5991 PetscInt n; 5992 PetscBool flg_row; 5993 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 5994 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5995 for (i=0;i<n;i++) { 5996 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 5997 } 5998 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 5999 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6000 } 6001 6002 if (pcbddc->dbg_flag) { 6003 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6004 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6005 } 6006 6007 6008 /* Now we loop on the constraints which need a change of basis */ 6009 /* 6010 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6011 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6012 6013 Basic blocks of change of basis matrix T computed by 6014 6015 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6016 6017 | 1 0 ... 0 s_1/S | 6018 | 0 1 ... 0 s_2/S | 6019 | ... | 6020 | 0 ... 1 s_{n-1}/S | 6021 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6022 6023 with S = \sum_{i=1}^n s_i^2 6024 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6025 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6026 6027 - QR decomposition of constraints otherwise 6028 */ 6029 if (qr_needed) { 6030 /* space to store Q */ 6031 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6032 /* array to store scaling factors for reflectors */ 6033 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6034 /* first we issue queries for optimal work */ 6035 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6036 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6037 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6038 lqr_work = -1; 6039 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6040 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6041 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6042 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6043 lgqr_work = -1; 6044 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6045 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6046 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6047 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6048 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6049 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6050 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to UNGQR Lapack routine %d",(int)lierr); 6051 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6052 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6053 /* array to store rhs and solution of triangular solver */ 6054 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6055 /* allocating workspace for check */ 6056 if (pcbddc->dbg_flag) { 6057 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6058 } 6059 } 6060 /* array to store whether a node is primal or not */ 6061 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6062 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6063 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6064 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 6065 for (i=0;i<total_primal_vertices;i++) { 6066 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6067 } 6068 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6069 6070 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6071 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6072 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6073 if (PetscBTLookup(change_basis,total_counts)) { 6074 /* get constraint info */ 6075 primal_dofs = constraints_n[total_counts]; 6076 dual_dofs = size_of_constraint-primal_dofs; 6077 6078 if (pcbddc->dbg_flag) { 6079 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6080 } 6081 6082 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6083 6084 /* copy quadrature constraints for change of basis check */ 6085 if (pcbddc->dbg_flag) { 6086 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6087 } 6088 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6089 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6090 6091 /* compute QR decomposition of constraints */ 6092 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6093 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6094 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6095 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6096 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6097 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6098 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6099 6100 /* explictly compute R^-T */ 6101 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 6102 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6103 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6104 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6105 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6106 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6107 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6108 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6109 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6110 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6111 6112 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6113 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6114 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6115 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6116 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6117 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6118 PetscStackCallBLAS("LAPACKungqr",LAPACKungqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6119 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in UNGQR Lapack routine %d",(int)lierr); 6120 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6121 6122 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6123 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6124 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6125 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6126 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6127 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6128 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6129 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6130 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6131 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6132 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6133 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6134 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6135 6136 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6137 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6138 /* insert cols for primal dofs */ 6139 for (j=0;j<primal_dofs;j++) { 6140 start_vals = &qr_basis[j*size_of_constraint]; 6141 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6142 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6143 } 6144 /* insert cols for dual dofs */ 6145 for (j=0,k=0;j<dual_dofs;k++) { 6146 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6147 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6148 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6149 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6150 j++; 6151 } 6152 } 6153 6154 /* check change of basis */ 6155 if (pcbddc->dbg_flag) { 6156 PetscInt ii,jj; 6157 PetscBool valid_qr=PETSC_TRUE; 6158 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6159 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6160 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6161 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6162 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6163 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6164 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6165 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6166 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6167 for (jj=0;jj<size_of_constraint;jj++) { 6168 for (ii=0;ii<primal_dofs;ii++) { 6169 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6170 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) valid_qr = PETSC_FALSE; 6171 } 6172 } 6173 if (!valid_qr) { 6174 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6175 for (jj=0;jj<size_of_constraint;jj++) { 6176 for (ii=0;ii<primal_dofs;ii++) { 6177 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6178 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6179 } 6180 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-1.0) > 1.e-12) { 6181 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6182 } 6183 } 6184 } 6185 } else { 6186 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6187 } 6188 } 6189 } else { /* simple transformation block */ 6190 PetscInt row,col; 6191 PetscScalar val,norm; 6192 6193 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6194 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6195 for (j=0;j<size_of_constraint;j++) { 6196 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6197 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6198 if (!PetscBTLookup(is_primal,row_B)) { 6199 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6200 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6201 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6202 } else { 6203 for (k=0;k<size_of_constraint;k++) { 6204 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6205 if (row != col) { 6206 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6207 } else { 6208 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6209 } 6210 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6211 } 6212 } 6213 } 6214 if (pcbddc->dbg_flag) { 6215 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6216 } 6217 } 6218 } else { 6219 if (pcbddc->dbg_flag) { 6220 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6221 } 6222 } 6223 } 6224 6225 /* free workspace */ 6226 if (qr_needed) { 6227 if (pcbddc->dbg_flag) { 6228 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6229 } 6230 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6231 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6232 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6233 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6234 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6235 } 6236 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6237 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6238 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6239 6240 /* assembling of global change of variable */ 6241 if (!pcbddc->fake_change) { 6242 Mat tmat; 6243 PetscInt bs; 6244 6245 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6246 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6247 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6248 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6249 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6250 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6251 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6252 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6253 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6254 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6255 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6256 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6257 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6258 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6259 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6260 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6261 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6262 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6263 6264 /* check */ 6265 if (pcbddc->dbg_flag) { 6266 PetscReal error; 6267 Vec x,x_change; 6268 6269 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6270 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6271 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6272 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6273 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6274 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6275 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6276 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6277 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6278 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6279 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6280 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6281 if (error > PETSC_SMALL) { 6282 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6283 } 6284 ierr = VecDestroy(&x);CHKERRQ(ierr); 6285 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6286 } 6287 /* adapt sub_schurs computed (if any) */ 6288 if (pcbddc->use_deluxe_scaling) { 6289 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6290 6291 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints");CHKERRQ(ierr); 6292 if (sub_schurs && sub_schurs->S_Ej_all) { 6293 Mat S_new,tmat; 6294 IS is_all_N,is_V_Sall = NULL; 6295 6296 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6297 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6298 if (pcbddc->deluxe_zerorows) { 6299 ISLocalToGlobalMapping NtoSall; 6300 IS is_V; 6301 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6302 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6303 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6304 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6305 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6306 } 6307 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6308 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6309 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6310 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6311 if (pcbddc->deluxe_zerorows) { 6312 const PetscScalar *array; 6313 const PetscInt *idxs_V,*idxs_all; 6314 PetscInt i,n_V; 6315 6316 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6317 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6318 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6319 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6320 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6321 for (i=0;i<n_V;i++) { 6322 PetscScalar val; 6323 PetscInt idx; 6324 6325 idx = idxs_V[i]; 6326 val = array[idxs_all[idxs_V[i]]]; 6327 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6328 } 6329 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6330 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6331 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6332 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6333 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6334 } 6335 sub_schurs->S_Ej_all = S_new; 6336 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6337 if (sub_schurs->sum_S_Ej_all) { 6338 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6339 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6340 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6341 if (pcbddc->deluxe_zerorows) { 6342 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6343 } 6344 sub_schurs->sum_S_Ej_all = S_new; 6345 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6346 } 6347 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6348 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6349 } 6350 /* destroy any change of basis context in sub_schurs */ 6351 if (sub_schurs && sub_schurs->change) { 6352 PetscInt i; 6353 6354 for (i=0;i<sub_schurs->n_subs;i++) { 6355 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6356 } 6357 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6358 } 6359 } 6360 if (pcbddc->switch_static) { /* need to save the local change */ 6361 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6362 } else { 6363 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6364 } 6365 /* determine if any process has changed the pressures locally */ 6366 pcbddc->change_interior = pcbddc->benign_have_null; 6367 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6368 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6369 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6370 pcbddc->use_qr_single = qr_needed; 6371 } 6372 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6373 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6374 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6375 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6376 } else { 6377 Mat benign_global = NULL; 6378 if (pcbddc->benign_have_null) { 6379 Mat tmat; 6380 6381 pcbddc->change_interior = PETSC_TRUE; 6382 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6383 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6384 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6385 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6386 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6387 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6388 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6389 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6390 if (pcbddc->benign_change) { 6391 Mat M; 6392 6393 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6394 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6395 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6396 ierr = MatDestroy(&M);CHKERRQ(ierr); 6397 } else { 6398 Mat eye; 6399 PetscScalar *array; 6400 6401 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6402 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6403 for (i=0;i<pcis->n;i++) { 6404 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6405 } 6406 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6407 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6408 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6409 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6410 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6411 } 6412 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6413 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6414 } 6415 if (pcbddc->user_ChangeOfBasisMatrix) { 6416 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6417 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6418 } else if (pcbddc->benign_have_null) { 6419 pcbddc->ChangeOfBasisMatrix = benign_global; 6420 } 6421 } 6422 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6423 IS is_global; 6424 const PetscInt *gidxs; 6425 6426 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6427 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6428 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6429 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6430 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6431 } 6432 } 6433 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6434 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6435 } 6436 6437 if (!pcbddc->fake_change) { 6438 /* add pressure dofs to set of primal nodes for numbering purposes */ 6439 for (i=0;i<pcbddc->benign_n;i++) { 6440 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6441 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6442 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6443 pcbddc->local_primal_size_cc++; 6444 pcbddc->local_primal_size++; 6445 } 6446 6447 /* check if a new primal space has been introduced (also take into account benign trick) */ 6448 pcbddc->new_primal_space_local = PETSC_TRUE; 6449 if (olocal_primal_size == pcbddc->local_primal_size) { 6450 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6451 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6452 if (!pcbddc->new_primal_space_local) { 6453 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6454 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6455 } 6456 } 6457 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6458 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6459 } 6460 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6461 6462 /* flush dbg viewer */ 6463 if (pcbddc->dbg_flag) { 6464 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6465 } 6466 6467 /* free workspace */ 6468 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6469 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6470 if (!pcbddc->adaptive_selection) { 6471 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6472 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6473 } else { 6474 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6475 pcbddc->adaptive_constraints_idxs_ptr, 6476 pcbddc->adaptive_constraints_data_ptr, 6477 pcbddc->adaptive_constraints_idxs, 6478 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6479 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6480 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6481 } 6482 PetscFunctionReturn(0); 6483 } 6484 6485 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6486 { 6487 ISLocalToGlobalMapping map; 6488 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6489 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6490 PetscInt i,N; 6491 PetscBool rcsr = PETSC_FALSE; 6492 PetscErrorCode ierr; 6493 6494 PetscFunctionBegin; 6495 if (pcbddc->recompute_topography) { 6496 pcbddc->graphanalyzed = PETSC_FALSE; 6497 /* Reset previously computed graph */ 6498 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6499 /* Init local Graph struct */ 6500 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6501 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6502 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6503 6504 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6505 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6506 } 6507 /* Check validity of the csr graph passed in by the user */ 6508 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6509 6510 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6511 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6512 PetscInt *xadj,*adjncy; 6513 PetscInt nvtxs; 6514 PetscBool flg_row=PETSC_FALSE; 6515 6516 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6517 if (flg_row) { 6518 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6519 pcbddc->computed_rowadj = PETSC_TRUE; 6520 } 6521 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6522 rcsr = PETSC_TRUE; 6523 } 6524 if (pcbddc->dbg_flag) { 6525 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6526 } 6527 6528 /* Setup of Graph */ 6529 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6530 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6531 6532 /* attach info on disconnected subdomains if present */ 6533 if (pcbddc->n_local_subs) { 6534 PetscInt *local_subs; 6535 6536 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6537 for (i=0;i<pcbddc->n_local_subs;i++) { 6538 const PetscInt *idxs; 6539 PetscInt nl,j; 6540 6541 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6542 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6543 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6544 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6545 } 6546 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6547 pcbddc->mat_graph->local_subs = local_subs; 6548 } 6549 } 6550 6551 if (!pcbddc->graphanalyzed) { 6552 /* Graph's connected components analysis */ 6553 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6554 pcbddc->graphanalyzed = PETSC_TRUE; 6555 } 6556 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6557 PetscFunctionReturn(0); 6558 } 6559 6560 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6561 { 6562 PetscInt i,j; 6563 PetscScalar *alphas; 6564 PetscErrorCode ierr; 6565 6566 PetscFunctionBegin; 6567 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6568 for (i=0;i<n;i++) { 6569 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6570 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6571 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6572 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6573 } 6574 ierr = PetscFree(alphas);CHKERRQ(ierr); 6575 PetscFunctionReturn(0); 6576 } 6577 6578 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6579 { 6580 Mat A; 6581 PetscInt n_neighs,*neighs,*n_shared,**shared; 6582 PetscMPIInt size,rank,color; 6583 PetscInt *xadj,*adjncy; 6584 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6585 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6586 PetscInt void_procs,*procs_candidates = NULL; 6587 PetscInt xadj_count,*count; 6588 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6589 PetscSubcomm psubcomm; 6590 MPI_Comm subcomm; 6591 PetscErrorCode ierr; 6592 6593 PetscFunctionBegin; 6594 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6595 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6596 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6597 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6598 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6599 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6600 6601 if (have_void) *have_void = PETSC_FALSE; 6602 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6603 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6604 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6605 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6606 im_active = !!n; 6607 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6608 void_procs = size - active_procs; 6609 /* get ranks of of non-active processes in mat communicator */ 6610 if (void_procs) { 6611 PetscInt ncand; 6612 6613 if (have_void) *have_void = PETSC_TRUE; 6614 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6615 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6616 for (i=0,ncand=0;i<size;i++) { 6617 if (!procs_candidates[i]) { 6618 procs_candidates[ncand++] = i; 6619 } 6620 } 6621 /* force n_subdomains to be not greater that the number of non-active processes */ 6622 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6623 } 6624 6625 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6626 number of subdomains requested 1 -> send to master or first candidate in voids */ 6627 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6628 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6629 PetscInt issize,isidx,dest; 6630 if (*n_subdomains == 1) dest = 0; 6631 else dest = rank; 6632 if (im_active) { 6633 issize = 1; 6634 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6635 isidx = procs_candidates[dest]; 6636 } else { 6637 isidx = dest; 6638 } 6639 } else { 6640 issize = 0; 6641 isidx = -1; 6642 } 6643 if (*n_subdomains != 1) *n_subdomains = active_procs; 6644 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6645 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6646 PetscFunctionReturn(0); 6647 } 6648 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6649 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6650 threshold = PetscMax(threshold,2); 6651 6652 /* Get info on mapping */ 6653 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6654 6655 /* build local CSR graph of subdomains' connectivity */ 6656 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6657 xadj[0] = 0; 6658 xadj[1] = PetscMax(n_neighs-1,0); 6659 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6660 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6661 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6662 for (i=1;i<n_neighs;i++) 6663 for (j=0;j<n_shared[i];j++) 6664 count[shared[i][j]] += 1; 6665 6666 xadj_count = 0; 6667 for (i=1;i<n_neighs;i++) { 6668 for (j=0;j<n_shared[i];j++) { 6669 if (count[shared[i][j]] < threshold) { 6670 adjncy[xadj_count] = neighs[i]; 6671 adjncy_wgt[xadj_count] = n_shared[i]; 6672 xadj_count++; 6673 break; 6674 } 6675 } 6676 } 6677 xadj[1] = xadj_count; 6678 ierr = PetscFree(count);CHKERRQ(ierr); 6679 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6680 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6681 6682 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6683 6684 /* Restrict work on active processes only */ 6685 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6686 if (void_procs) { 6687 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6688 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6689 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6690 subcomm = PetscSubcommChild(psubcomm); 6691 } else { 6692 psubcomm = NULL; 6693 subcomm = PetscObjectComm((PetscObject)mat); 6694 } 6695 6696 v_wgt = NULL; 6697 if (!color) { 6698 ierr = PetscFree(xadj);CHKERRQ(ierr); 6699 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6700 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6701 } else { 6702 Mat subdomain_adj; 6703 IS new_ranks,new_ranks_contig; 6704 MatPartitioning partitioner; 6705 PetscInt rstart=0,rend=0; 6706 PetscInt *is_indices,*oldranks; 6707 PetscMPIInt size; 6708 PetscBool aggregate; 6709 6710 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6711 if (void_procs) { 6712 PetscInt prank = rank; 6713 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6714 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6715 for (i=0;i<xadj[1];i++) { 6716 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6717 } 6718 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6719 } else { 6720 oldranks = NULL; 6721 } 6722 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6723 if (aggregate) { /* TODO: all this part could be made more efficient */ 6724 PetscInt lrows,row,ncols,*cols; 6725 PetscMPIInt nrank; 6726 PetscScalar *vals; 6727 6728 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6729 lrows = 0; 6730 if (nrank<redprocs) { 6731 lrows = size/redprocs; 6732 if (nrank<size%redprocs) lrows++; 6733 } 6734 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6735 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6736 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6737 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6738 row = nrank; 6739 ncols = xadj[1]-xadj[0]; 6740 cols = adjncy; 6741 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6742 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6743 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6744 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6745 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6746 ierr = PetscFree(xadj);CHKERRQ(ierr); 6747 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6748 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6749 ierr = PetscFree(vals);CHKERRQ(ierr); 6750 if (use_vwgt) { 6751 Vec v; 6752 const PetscScalar *array; 6753 PetscInt nl; 6754 6755 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6756 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 6757 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6758 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6759 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6760 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6761 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6762 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6763 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6764 ierr = VecDestroy(&v);CHKERRQ(ierr); 6765 } 6766 } else { 6767 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6768 if (use_vwgt) { 6769 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6770 v_wgt[0] = n; 6771 } 6772 } 6773 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6774 6775 /* Partition */ 6776 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6777 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6778 if (v_wgt) { 6779 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6780 } 6781 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6782 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6783 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6784 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6785 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6786 6787 /* renumber new_ranks to avoid "holes" in new set of processors */ 6788 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6789 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6790 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6791 if (!aggregate) { 6792 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6793 #if defined(PETSC_USE_DEBUG) 6794 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6795 #endif 6796 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6797 } else if (oldranks) { 6798 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6799 } else { 6800 ranks_send_to_idx[0] = is_indices[0]; 6801 } 6802 } else { 6803 PetscInt idxs[1]; 6804 PetscMPIInt tag; 6805 MPI_Request *reqs; 6806 6807 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6808 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6809 for (i=rstart;i<rend;i++) { 6810 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6811 } 6812 ierr = MPI_Recv(idxs,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6813 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6814 ierr = PetscFree(reqs);CHKERRQ(ierr); 6815 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6816 #if defined(PETSC_USE_DEBUG) 6817 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6818 #endif 6819 ranks_send_to_idx[0] = procs_candidates[oldranks[idxs[0]]]; 6820 } else if (oldranks) { 6821 ranks_send_to_idx[0] = oldranks[idxs[0]]; 6822 } else { 6823 ranks_send_to_idx[0] = idxs[0]; 6824 } 6825 } 6826 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6827 /* clean up */ 6828 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6829 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6830 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6831 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6832 } 6833 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6834 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6835 6836 /* assemble parallel IS for sends */ 6837 i = 1; 6838 if (!color) i=0; 6839 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6840 PetscFunctionReturn(0); 6841 } 6842 6843 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6844 6845 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6846 { 6847 Mat local_mat; 6848 IS is_sends_internal; 6849 PetscInt rows,cols,new_local_rows; 6850 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6851 PetscBool ismatis,isdense,newisdense,destroy_mat; 6852 ISLocalToGlobalMapping l2gmap; 6853 PetscInt* l2gmap_indices; 6854 const PetscInt* is_indices; 6855 MatType new_local_type; 6856 /* buffers */ 6857 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6858 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6859 PetscInt *recv_buffer_idxs_local; 6860 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6861 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6862 /* MPI */ 6863 MPI_Comm comm,comm_n; 6864 PetscSubcomm subcomm; 6865 PetscMPIInt n_sends,n_recvs,commsize; 6866 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6867 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6868 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6869 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6870 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 6871 PetscErrorCode ierr; 6872 6873 PetscFunctionBegin; 6874 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6875 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6876 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6877 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 6878 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 6879 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 6880 PetscValidLogicalCollectiveBool(mat,reuse,6); 6881 PetscValidLogicalCollectiveInt(mat,nis,8); 6882 PetscValidLogicalCollectiveInt(mat,nvecs,10); 6883 if (nvecs) { 6884 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 6885 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 6886 } 6887 /* further checks */ 6888 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 6889 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 6890 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 6891 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 6892 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 6893 if (reuse && *mat_n) { 6894 PetscInt mrows,mcols,mnrows,mncols; 6895 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 6896 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 6897 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 6898 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 6899 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 6900 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 6901 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 6902 } 6903 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 6904 PetscValidLogicalCollectiveInt(mat,bs,0); 6905 6906 /* prepare IS for sending if not provided */ 6907 if (!is_sends) { 6908 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 6909 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 6910 } else { 6911 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 6912 is_sends_internal = is_sends; 6913 } 6914 6915 /* get comm */ 6916 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 6917 6918 /* compute number of sends */ 6919 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 6920 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 6921 6922 /* compute number of receives */ 6923 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 6924 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 6925 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 6926 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 6927 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 6928 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 6929 ierr = PetscFree(iflags);CHKERRQ(ierr); 6930 6931 /* restrict comm if requested */ 6932 subcomm = 0; 6933 destroy_mat = PETSC_FALSE; 6934 if (restrict_comm) { 6935 PetscMPIInt color,subcommsize; 6936 6937 color = 0; 6938 if (restrict_full) { 6939 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 6940 } else { 6941 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 6942 } 6943 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 6944 subcommsize = commsize - subcommsize; 6945 /* check if reuse has been requested */ 6946 if (reuse) { 6947 if (*mat_n) { 6948 PetscMPIInt subcommsize2; 6949 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 6950 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 6951 comm_n = PetscObjectComm((PetscObject)*mat_n); 6952 } else { 6953 comm_n = PETSC_COMM_SELF; 6954 } 6955 } else { /* MAT_INITIAL_MATRIX */ 6956 PetscMPIInt rank; 6957 6958 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 6959 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 6960 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 6961 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 6962 comm_n = PetscSubcommChild(subcomm); 6963 } 6964 /* flag to destroy *mat_n if not significative */ 6965 if (color) destroy_mat = PETSC_TRUE; 6966 } else { 6967 comm_n = comm; 6968 } 6969 6970 /* prepare send/receive buffers */ 6971 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 6972 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 6973 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 6974 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 6975 if (nis) { 6976 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 6977 } 6978 6979 /* Get data from local matrices */ 6980 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 6981 /* TODO: See below some guidelines on how to prepare the local buffers */ 6982 /* 6983 send_buffer_vals should contain the raw values of the local matrix 6984 send_buffer_idxs should contain: 6985 - MatType_PRIVATE type 6986 - PetscInt size_of_l2gmap 6987 - PetscInt global_row_indices[size_of_l2gmap] 6988 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 6989 */ 6990 else { 6991 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 6992 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 6993 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 6994 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 6995 send_buffer_idxs[1] = i; 6996 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6997 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 6998 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 6999 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7000 for (i=0;i<n_sends;i++) { 7001 ilengths_vals[is_indices[i]] = len*len; 7002 ilengths_idxs[is_indices[i]] = len+2; 7003 } 7004 } 7005 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7006 /* additional is (if any) */ 7007 if (nis) { 7008 PetscMPIInt psum; 7009 PetscInt j; 7010 for (j=0,psum=0;j<nis;j++) { 7011 PetscInt plen; 7012 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7013 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7014 psum += len+1; /* indices + lenght */ 7015 } 7016 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7017 for (j=0,psum=0;j<nis;j++) { 7018 PetscInt plen; 7019 const PetscInt *is_array_idxs; 7020 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7021 send_buffer_idxs_is[psum] = plen; 7022 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7023 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 7024 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7025 psum += plen+1; /* indices + lenght */ 7026 } 7027 for (i=0;i<n_sends;i++) { 7028 ilengths_idxs_is[is_indices[i]] = psum; 7029 } 7030 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7031 } 7032 7033 buf_size_idxs = 0; 7034 buf_size_vals = 0; 7035 buf_size_idxs_is = 0; 7036 buf_size_vecs = 0; 7037 for (i=0;i<n_recvs;i++) { 7038 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7039 buf_size_vals += (PetscInt)olengths_vals[i]; 7040 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7041 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7042 } 7043 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7044 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7045 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7046 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7047 7048 /* get new tags for clean communications */ 7049 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7050 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7051 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7052 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7053 7054 /* allocate for requests */ 7055 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7056 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7057 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7058 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7059 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7060 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7061 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7062 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7063 7064 /* communications */ 7065 ptr_idxs = recv_buffer_idxs; 7066 ptr_vals = recv_buffer_vals; 7067 ptr_idxs_is = recv_buffer_idxs_is; 7068 ptr_vecs = recv_buffer_vecs; 7069 for (i=0;i<n_recvs;i++) { 7070 source_dest = onodes[i]; 7071 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7072 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7073 ptr_idxs += olengths_idxs[i]; 7074 ptr_vals += olengths_vals[i]; 7075 if (nis) { 7076 source_dest = onodes_is[i]; 7077 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7078 ptr_idxs_is += olengths_idxs_is[i]; 7079 } 7080 if (nvecs) { 7081 source_dest = onodes[i]; 7082 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7083 ptr_vecs += olengths_idxs[i]-2; 7084 } 7085 } 7086 for (i=0;i<n_sends;i++) { 7087 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7088 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7089 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7090 if (nis) { 7091 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7092 } 7093 if (nvecs) { 7094 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7095 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7096 } 7097 } 7098 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7099 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7100 7101 /* assemble new l2g map */ 7102 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7103 ptr_idxs = recv_buffer_idxs; 7104 new_local_rows = 0; 7105 for (i=0;i<n_recvs;i++) { 7106 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7107 ptr_idxs += olengths_idxs[i]; 7108 } 7109 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7110 ptr_idxs = recv_buffer_idxs; 7111 new_local_rows = 0; 7112 for (i=0;i<n_recvs;i++) { 7113 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 7114 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7115 ptr_idxs += olengths_idxs[i]; 7116 } 7117 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7118 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7119 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7120 7121 /* infer new local matrix type from received local matrices type */ 7122 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7123 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7124 if (n_recvs) { 7125 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7126 ptr_idxs = recv_buffer_idxs; 7127 for (i=0;i<n_recvs;i++) { 7128 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7129 new_local_type_private = MATAIJ_PRIVATE; 7130 break; 7131 } 7132 ptr_idxs += olengths_idxs[i]; 7133 } 7134 switch (new_local_type_private) { 7135 case MATDENSE_PRIVATE: 7136 new_local_type = MATSEQAIJ; 7137 bs = 1; 7138 break; 7139 case MATAIJ_PRIVATE: 7140 new_local_type = MATSEQAIJ; 7141 bs = 1; 7142 break; 7143 case MATBAIJ_PRIVATE: 7144 new_local_type = MATSEQBAIJ; 7145 break; 7146 case MATSBAIJ_PRIVATE: 7147 new_local_type = MATSEQSBAIJ; 7148 break; 7149 default: 7150 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7151 break; 7152 } 7153 } else { /* by default, new_local_type is seqaij */ 7154 new_local_type = MATSEQAIJ; 7155 bs = 1; 7156 } 7157 7158 /* create MATIS object if needed */ 7159 if (!reuse) { 7160 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7161 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7162 } else { 7163 /* it also destroys the local matrices */ 7164 if (*mat_n) { 7165 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7166 } else { /* this is a fake object */ 7167 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7168 } 7169 } 7170 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7171 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7172 7173 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7174 7175 /* Global to local map of received indices */ 7176 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7177 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7178 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7179 7180 /* restore attributes -> type of incoming data and its size */ 7181 buf_size_idxs = 0; 7182 for (i=0;i<n_recvs;i++) { 7183 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7184 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7185 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7186 } 7187 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7188 7189 /* set preallocation */ 7190 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7191 if (!newisdense) { 7192 PetscInt *new_local_nnz=0; 7193 7194 ptr_idxs = recv_buffer_idxs_local; 7195 if (n_recvs) { 7196 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7197 } 7198 for (i=0;i<n_recvs;i++) { 7199 PetscInt j; 7200 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7201 for (j=0;j<*(ptr_idxs+1);j++) { 7202 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7203 } 7204 } else { 7205 /* TODO */ 7206 } 7207 ptr_idxs += olengths_idxs[i]; 7208 } 7209 if (new_local_nnz) { 7210 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7211 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7212 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7213 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7214 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7215 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7216 } else { 7217 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7218 } 7219 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7220 } else { 7221 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7222 } 7223 7224 /* set values */ 7225 ptr_vals = recv_buffer_vals; 7226 ptr_idxs = recv_buffer_idxs_local; 7227 for (i=0;i<n_recvs;i++) { 7228 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7229 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7230 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7231 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7232 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7233 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7234 } else { 7235 /* TODO */ 7236 } 7237 ptr_idxs += olengths_idxs[i]; 7238 ptr_vals += olengths_vals[i]; 7239 } 7240 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7241 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7242 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7243 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7244 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7245 7246 #if 0 7247 if (!restrict_comm) { /* check */ 7248 Vec lvec,rvec; 7249 PetscReal infty_error; 7250 7251 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7252 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7253 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7254 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7255 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7256 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7257 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7258 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7259 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7260 } 7261 #endif 7262 7263 /* assemble new additional is (if any) */ 7264 if (nis) { 7265 PetscInt **temp_idxs,*count_is,j,psum; 7266 7267 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7268 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7269 ptr_idxs = recv_buffer_idxs_is; 7270 psum = 0; 7271 for (i=0;i<n_recvs;i++) { 7272 for (j=0;j<nis;j++) { 7273 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7274 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7275 psum += plen; 7276 ptr_idxs += plen+1; /* shift pointer to received data */ 7277 } 7278 } 7279 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7280 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7281 for (i=1;i<nis;i++) { 7282 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7283 } 7284 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7285 ptr_idxs = recv_buffer_idxs_is; 7286 for (i=0;i<n_recvs;i++) { 7287 for (j=0;j<nis;j++) { 7288 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7289 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7290 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7291 ptr_idxs += plen+1; /* shift pointer to received data */ 7292 } 7293 } 7294 for (i=0;i<nis;i++) { 7295 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7296 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7297 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7298 } 7299 ierr = PetscFree(count_is);CHKERRQ(ierr); 7300 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7301 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7302 } 7303 /* free workspace */ 7304 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7305 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7306 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7307 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7308 if (isdense) { 7309 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7310 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7311 } else { 7312 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7313 } 7314 if (nis) { 7315 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7316 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7317 } 7318 7319 if (nvecs) { 7320 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7321 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7322 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7323 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7324 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7325 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7326 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7327 /* set values */ 7328 ptr_vals = recv_buffer_vecs; 7329 ptr_idxs = recv_buffer_idxs_local; 7330 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7331 for (i=0;i<n_recvs;i++) { 7332 PetscInt j; 7333 for (j=0;j<*(ptr_idxs+1);j++) { 7334 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7335 } 7336 ptr_idxs += olengths_idxs[i]; 7337 ptr_vals += olengths_idxs[i]-2; 7338 } 7339 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7340 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7341 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7342 } 7343 7344 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7345 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7346 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7347 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7348 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7349 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7350 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7351 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7352 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7353 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7354 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7355 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7356 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7357 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7358 ierr = PetscFree(onodes);CHKERRQ(ierr); 7359 if (nis) { 7360 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7361 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7362 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7363 } 7364 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7365 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7366 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7367 for (i=0;i<nis;i++) { 7368 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7369 } 7370 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7371 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7372 } 7373 *mat_n = NULL; 7374 } 7375 PetscFunctionReturn(0); 7376 } 7377 7378 /* temporary hack into ksp private data structure */ 7379 #include <petsc/private/kspimpl.h> 7380 7381 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7382 { 7383 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7384 PC_IS *pcis = (PC_IS*)pc->data; 7385 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7386 Mat coarsedivudotp = NULL; 7387 Mat coarseG,t_coarse_mat_is; 7388 MatNullSpace CoarseNullSpace = NULL; 7389 ISLocalToGlobalMapping coarse_islg; 7390 IS coarse_is,*isarray; 7391 PetscInt i,im_active=-1,active_procs=-1; 7392 PetscInt nis,nisdofs,nisneu,nisvert; 7393 PC pc_temp; 7394 PCType coarse_pc_type; 7395 KSPType coarse_ksp_type; 7396 PetscBool multilevel_requested,multilevel_allowed; 7397 PetscBool isredundant,isbddc,isnn,coarse_reuse; 7398 PetscInt ncoarse,nedcfield; 7399 PetscBool compute_vecs = PETSC_FALSE; 7400 PetscScalar *array; 7401 MatReuse coarse_mat_reuse; 7402 PetscBool restr, full_restr, have_void; 7403 PetscMPIInt commsize; 7404 PetscErrorCode ierr; 7405 7406 PetscFunctionBegin; 7407 /* Assign global numbering to coarse dofs */ 7408 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7409 PetscInt ocoarse_size; 7410 compute_vecs = PETSC_TRUE; 7411 7412 pcbddc->new_primal_space = PETSC_TRUE; 7413 ocoarse_size = pcbddc->coarse_size; 7414 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7415 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7416 /* see if we can avoid some work */ 7417 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7418 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7419 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7420 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7421 coarse_reuse = PETSC_FALSE; 7422 } else { /* we can safely reuse already computed coarse matrix */ 7423 coarse_reuse = PETSC_TRUE; 7424 } 7425 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7426 coarse_reuse = PETSC_FALSE; 7427 } 7428 /* reset any subassembling information */ 7429 if (!coarse_reuse || pcbddc->recompute_topography) { 7430 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7431 } 7432 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7433 coarse_reuse = PETSC_TRUE; 7434 } 7435 /* assemble coarse matrix */ 7436 if (coarse_reuse && pcbddc->coarse_ksp) { 7437 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7438 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7439 coarse_mat_reuse = MAT_REUSE_MATRIX; 7440 } else { 7441 coarse_mat = NULL; 7442 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7443 } 7444 7445 /* creates temporary l2gmap and IS for coarse indexes */ 7446 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7447 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7448 7449 /* creates temporary MATIS object for coarse matrix */ 7450 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7451 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7452 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7453 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7454 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7455 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7456 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7457 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7458 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7459 7460 /* count "active" (i.e. with positive local size) and "void" processes */ 7461 im_active = !!(pcis->n); 7462 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7463 7464 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7465 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7466 /* full_restr : just use the receivers from the subassembling pattern */ 7467 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);CHKERRQ(ierr); 7468 coarse_mat_is = NULL; 7469 multilevel_allowed = PETSC_FALSE; 7470 multilevel_requested = PETSC_FALSE; 7471 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7472 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7473 if (multilevel_requested) { 7474 ncoarse = active_procs/pcbddc->coarsening_ratio; 7475 restr = PETSC_FALSE; 7476 full_restr = PETSC_FALSE; 7477 } else { 7478 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7479 restr = PETSC_TRUE; 7480 full_restr = PETSC_TRUE; 7481 } 7482 if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7483 ncoarse = PetscMax(1,ncoarse); 7484 if (!pcbddc->coarse_subassembling) { 7485 if (pcbddc->coarsening_ratio > 1) { 7486 if (multilevel_requested) { 7487 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7488 } else { 7489 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7490 } 7491 } else { 7492 PetscMPIInt rank; 7493 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7494 have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE; 7495 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7496 } 7497 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7498 PetscInt psum; 7499 if (pcbddc->coarse_ksp) psum = 1; 7500 else psum = 0; 7501 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7502 if (ncoarse < commsize) have_void = PETSC_TRUE; 7503 } 7504 /* determine if we can go multilevel */ 7505 if (multilevel_requested) { 7506 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7507 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7508 } 7509 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7510 7511 /* dump subassembling pattern */ 7512 if (pcbddc->dbg_flag && multilevel_allowed) { 7513 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7514 } 7515 7516 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7517 nedcfield = -1; 7518 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7519 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7520 const PetscInt *idxs; 7521 ISLocalToGlobalMapping tmap; 7522 7523 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7524 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7525 /* allocate space for temporary storage */ 7526 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7527 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7528 /* allocate for IS array */ 7529 nisdofs = pcbddc->n_ISForDofsLocal; 7530 if (pcbddc->nedclocal) { 7531 if (pcbddc->nedfield > -1) { 7532 nedcfield = pcbddc->nedfield; 7533 } else { 7534 nedcfield = 0; 7535 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7536 nisdofs = 1; 7537 } 7538 } 7539 nisneu = !!pcbddc->NeumannBoundariesLocal; 7540 nisvert = 0; /* nisvert is not used */ 7541 nis = nisdofs + nisneu + nisvert; 7542 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7543 /* dofs splitting */ 7544 for (i=0;i<nisdofs;i++) { 7545 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7546 if (nedcfield != i) { 7547 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7548 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7549 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7550 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7551 } else { 7552 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7553 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7554 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7555 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7556 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7557 } 7558 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7559 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7560 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7561 } 7562 /* neumann boundaries */ 7563 if (pcbddc->NeumannBoundariesLocal) { 7564 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7565 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7566 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7567 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7568 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7569 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7570 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7571 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7572 } 7573 /* free memory */ 7574 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7575 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7576 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7577 } else { 7578 nis = 0; 7579 nisdofs = 0; 7580 nisneu = 0; 7581 nisvert = 0; 7582 isarray = NULL; 7583 } 7584 /* destroy no longer needed map */ 7585 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7586 7587 /* subassemble */ 7588 if (multilevel_allowed) { 7589 Vec vp[1]; 7590 PetscInt nvecs = 0; 7591 PetscBool reuse,reuser; 7592 7593 if (coarse_mat) reuse = PETSC_TRUE; 7594 else reuse = PETSC_FALSE; 7595 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7596 vp[0] = NULL; 7597 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7598 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7599 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7600 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7601 nvecs = 1; 7602 7603 if (pcbddc->divudotp) { 7604 Mat B,loc_divudotp; 7605 Vec v,p; 7606 IS dummy; 7607 PetscInt np; 7608 7609 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7610 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7611 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7612 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7613 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7614 ierr = VecSet(p,1.);CHKERRQ(ierr); 7615 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7616 ierr = VecDestroy(&p);CHKERRQ(ierr); 7617 ierr = MatDestroy(&B);CHKERRQ(ierr); 7618 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7619 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7620 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7621 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7622 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7623 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7624 ierr = VecDestroy(&v);CHKERRQ(ierr); 7625 } 7626 } 7627 if (reuser) { 7628 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7629 } else { 7630 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7631 } 7632 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7633 PetscScalar *arraym,*arrayv; 7634 PetscInt nl; 7635 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7636 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7637 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7638 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7639 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7640 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7641 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7642 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7643 } else { 7644 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7645 } 7646 } else { 7647 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7648 } 7649 if (coarse_mat_is || coarse_mat) { 7650 PetscMPIInt size; 7651 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7652 if (!multilevel_allowed) { 7653 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7654 } else { 7655 Mat A; 7656 7657 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7658 if (coarse_mat_is) { 7659 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7660 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7661 coarse_mat = coarse_mat_is; 7662 } 7663 /* be sure we don't have MatSeqDENSE as local mat */ 7664 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7665 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7666 } 7667 } 7668 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7669 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7670 7671 /* create local to global scatters for coarse problem */ 7672 if (compute_vecs) { 7673 PetscInt lrows; 7674 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7675 if (coarse_mat) { 7676 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7677 } else { 7678 lrows = 0; 7679 } 7680 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7681 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7682 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7683 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7684 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7685 } 7686 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7687 7688 /* set defaults for coarse KSP and PC */ 7689 if (multilevel_allowed) { 7690 coarse_ksp_type = KSPRICHARDSON; 7691 coarse_pc_type = PCBDDC; 7692 } else { 7693 coarse_ksp_type = KSPPREONLY; 7694 coarse_pc_type = PCREDUNDANT; 7695 } 7696 7697 /* print some info if requested */ 7698 if (pcbddc->dbg_flag) { 7699 if (!multilevel_allowed) { 7700 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7701 if (multilevel_requested) { 7702 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7703 } else if (pcbddc->max_levels) { 7704 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7705 } 7706 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7707 } 7708 } 7709 7710 /* communicate coarse discrete gradient */ 7711 coarseG = NULL; 7712 if (pcbddc->nedcG && multilevel_allowed) { 7713 MPI_Comm ccomm; 7714 if (coarse_mat) { 7715 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7716 } else { 7717 ccomm = MPI_COMM_NULL; 7718 } 7719 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7720 } 7721 7722 /* create the coarse KSP object only once with defaults */ 7723 if (coarse_mat) { 7724 PetscViewer dbg_viewer = NULL; 7725 if (pcbddc->dbg_flag) { 7726 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7727 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7728 } 7729 if (!pcbddc->coarse_ksp) { 7730 char prefix[256],str_level[16]; 7731 size_t len; 7732 7733 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7734 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7735 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7736 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7737 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7738 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7739 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7740 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7741 /* TODO is this logic correct? should check for coarse_mat type */ 7742 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7743 /* prefix */ 7744 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7745 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7746 if (!pcbddc->current_level) { 7747 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7748 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7749 } else { 7750 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7751 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7752 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7753 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7754 sprintf(str_level,"l%d_",(int)(pcbddc->current_level)); 7755 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7756 } 7757 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7758 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7759 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7760 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7761 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7762 /* allow user customization */ 7763 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7764 } 7765 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7766 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7767 if (nisdofs) { 7768 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7769 for (i=0;i<nisdofs;i++) { 7770 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7771 } 7772 } 7773 if (nisneu) { 7774 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7775 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7776 } 7777 if (nisvert) { 7778 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7779 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7780 } 7781 if (coarseG) { 7782 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7783 } 7784 7785 /* get some info after set from options */ 7786 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7787 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7788 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7789 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 7790 if (isbddc && !multilevel_allowed) { 7791 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7792 isbddc = PETSC_FALSE; 7793 } 7794 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 7795 if (multilevel_requested && !isbddc && !isnn) { 7796 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 7797 isbddc = PETSC_TRUE; 7798 isnn = PETSC_FALSE; 7799 } 7800 ierr = PCFactorSetReuseFill(pc_temp,PETSC_TRUE);CHKERRQ(ierr); 7801 if (isredundant) { 7802 KSP inner_ksp; 7803 PC inner_pc; 7804 7805 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7806 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7807 ierr = PCFactorSetReuseFill(inner_pc,PETSC_TRUE);CHKERRQ(ierr); 7808 } 7809 7810 /* parameters which miss an API */ 7811 if (isbddc) { 7812 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7813 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7814 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7815 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7816 if (pcbddc_coarse->benign_saddle_point) { 7817 Mat coarsedivudotp_is; 7818 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7819 IS row,col; 7820 const PetscInt *gidxs; 7821 PetscInt n,st,M,N; 7822 7823 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7824 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7825 st = st-n; 7826 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7827 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7828 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7829 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7830 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7831 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7832 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7833 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7834 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7835 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7836 ierr = ISDestroy(&row);CHKERRQ(ierr); 7837 ierr = ISDestroy(&col);CHKERRQ(ierr); 7838 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7839 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7840 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7841 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7842 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7843 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7844 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7845 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7846 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7847 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7848 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7849 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7850 } 7851 } 7852 7853 /* propagate symmetry info of coarse matrix */ 7854 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7855 if (pc->pmat->symmetric_set) { 7856 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7857 } 7858 if (pc->pmat->hermitian_set) { 7859 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7860 } 7861 if (pc->pmat->spd_set) { 7862 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7863 } 7864 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7865 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7866 } 7867 /* set operators */ 7868 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7869 if (pcbddc->dbg_flag) { 7870 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7871 } 7872 } 7873 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 7874 ierr = PetscFree(isarray);CHKERRQ(ierr); 7875 #if 0 7876 { 7877 PetscViewer viewer; 7878 char filename[256]; 7879 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 7880 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 7881 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 7882 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 7883 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 7884 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 7885 } 7886 #endif 7887 7888 if (pcbddc->coarse_ksp) { 7889 Vec crhs,csol; 7890 7891 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 7892 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 7893 if (!csol) { 7894 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 7895 } 7896 if (!crhs) { 7897 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 7898 } 7899 } 7900 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7901 7902 /* compute null space for coarse solver if the benign trick has been requested */ 7903 if (pcbddc->benign_null) { 7904 7905 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 7906 for (i=0;i<pcbddc->benign_n;i++) { 7907 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 7908 } 7909 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 7910 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 7911 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7912 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 7913 if (coarse_mat) { 7914 Vec nullv; 7915 PetscScalar *array,*array2; 7916 PetscInt nl; 7917 7918 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 7919 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 7920 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7921 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 7922 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 7923 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 7924 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 7925 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 7926 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 7927 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 7928 } 7929 } 7930 7931 if (pcbddc->coarse_ksp) { 7932 PetscBool ispreonly; 7933 7934 if (CoarseNullSpace) { 7935 PetscBool isnull; 7936 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 7937 if (isnull) { 7938 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 7939 } 7940 /* TODO: add local nullspaces (if any) */ 7941 } 7942 /* setup coarse ksp */ 7943 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 7944 /* Check coarse problem if in debug mode or if solving with an iterative method */ 7945 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 7946 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 7947 KSP check_ksp; 7948 KSPType check_ksp_type; 7949 PC check_pc; 7950 Vec check_vec,coarse_vec; 7951 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 7952 PetscInt its; 7953 PetscBool compute_eigs; 7954 PetscReal *eigs_r,*eigs_c; 7955 PetscInt neigs; 7956 const char *prefix; 7957 7958 /* Create ksp object suitable for estimation of extreme eigenvalues */ 7959 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 7960 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7961 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7962 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 7963 /* prevent from setup unneeded object */ 7964 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 7965 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 7966 if (ispreonly) { 7967 check_ksp_type = KSPPREONLY; 7968 compute_eigs = PETSC_FALSE; 7969 } else { 7970 check_ksp_type = KSPGMRES; 7971 compute_eigs = PETSC_TRUE; 7972 } 7973 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 7974 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 7975 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 7976 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 7977 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 7978 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 7979 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 7980 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 7981 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 7982 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 7983 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 7984 /* create random vec */ 7985 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 7986 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 7987 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 7988 /* solve coarse problem */ 7989 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 7990 /* set eigenvalue estimation if preonly has not been requested */ 7991 if (compute_eigs) { 7992 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 7993 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 7994 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 7995 if (neigs) { 7996 lambda_max = eigs_r[neigs-1]; 7997 lambda_min = eigs_r[0]; 7998 if (pcbddc->use_coarse_estimates) { 7999 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8000 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8001 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8002 } 8003 } 8004 } 8005 } 8006 8007 /* check coarse problem residual error */ 8008 if (pcbddc->dbg_flag) { 8009 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8010 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8011 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8012 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8013 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8014 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8015 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8016 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8017 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8018 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8019 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8020 if (CoarseNullSpace) { 8021 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8022 } 8023 if (compute_eigs) { 8024 PetscReal lambda_max_s,lambda_min_s; 8025 KSPConvergedReason reason; 8026 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8027 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8028 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8029 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8030 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8031 for (i=0;i<neigs;i++) { 8032 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8033 } 8034 } 8035 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8036 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8037 } 8038 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8039 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8040 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8041 if (compute_eigs) { 8042 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8043 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8044 } 8045 } 8046 } 8047 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8048 /* print additional info */ 8049 if (pcbddc->dbg_flag) { 8050 /* waits until all processes reaches this point */ 8051 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8052 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 8053 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8054 } 8055 8056 /* free memory */ 8057 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8058 PetscFunctionReturn(0); 8059 } 8060 8061 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8062 { 8063 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8064 PC_IS* pcis = (PC_IS*)pc->data; 8065 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8066 IS subset,subset_mult,subset_n; 8067 PetscInt local_size,coarse_size=0; 8068 PetscInt *local_primal_indices=NULL; 8069 const PetscInt *t_local_primal_indices; 8070 PetscErrorCode ierr; 8071 8072 PetscFunctionBegin; 8073 /* Compute global number of coarse dofs */ 8074 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8075 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8076 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8077 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8078 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8079 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8080 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8081 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8082 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8083 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8084 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8085 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8086 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 8087 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8088 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8089 8090 /* check numbering */ 8091 if (pcbddc->dbg_flag) { 8092 PetscScalar coarsesum,*array,*array2; 8093 PetscInt i; 8094 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8095 8096 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8097 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8098 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8099 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8100 /* counter */ 8101 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8102 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8103 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8104 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8105 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8106 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8107 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8108 for (i=0;i<pcbddc->local_primal_size;i++) { 8109 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8110 } 8111 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8112 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8113 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8114 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8115 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8116 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8117 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8118 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8119 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8120 for (i=0;i<pcis->n;i++) { 8121 if (array[i] != 0.0 && array[i] != array2[i]) { 8122 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8123 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8124 set_error = PETSC_TRUE; 8125 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8126 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8127 } 8128 } 8129 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8130 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8131 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8132 for (i=0;i<pcis->n;i++) { 8133 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8134 } 8135 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8136 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8137 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8138 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8139 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8140 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8141 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8142 PetscInt *gidxs; 8143 8144 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8145 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8146 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8147 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8148 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8149 for (i=0;i<pcbddc->local_primal_size;i++) { 8150 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8151 } 8152 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8153 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8154 } 8155 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8156 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8157 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8158 } 8159 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 8160 /* get back data */ 8161 *coarse_size_n = coarse_size; 8162 *local_primal_indices_n = local_primal_indices; 8163 PetscFunctionReturn(0); 8164 } 8165 8166 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8167 { 8168 IS localis_t; 8169 PetscInt i,lsize,*idxs,n; 8170 PetscScalar *vals; 8171 PetscErrorCode ierr; 8172 8173 PetscFunctionBegin; 8174 /* get indices in local ordering exploiting local to global map */ 8175 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8176 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8177 for (i=0;i<lsize;i++) vals[i] = 1.0; 8178 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8179 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8180 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8181 if (idxs) { /* multilevel guard */ 8182 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8183 } 8184 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8185 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8186 ierr = PetscFree(vals);CHKERRQ(ierr); 8187 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8188 /* now compute set in local ordering */ 8189 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8190 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8191 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8192 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8193 for (i=0,lsize=0;i<n;i++) { 8194 if (PetscRealPart(vals[i]) > 0.5) { 8195 lsize++; 8196 } 8197 } 8198 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8199 for (i=0,lsize=0;i<n;i++) { 8200 if (PetscRealPart(vals[i]) > 0.5) { 8201 idxs[lsize++] = i; 8202 } 8203 } 8204 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8205 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8206 *localis = localis_t; 8207 PetscFunctionReturn(0); 8208 } 8209 8210 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8211 { 8212 PC_IS *pcis=(PC_IS*)pc->data; 8213 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8214 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8215 Mat S_j; 8216 PetscInt *used_xadj,*used_adjncy; 8217 PetscBool free_used_adj; 8218 PetscErrorCode ierr; 8219 8220 PetscFunctionBegin; 8221 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8222 free_used_adj = PETSC_FALSE; 8223 if (pcbddc->sub_schurs_layers == -1) { 8224 used_xadj = NULL; 8225 used_adjncy = NULL; 8226 } else { 8227 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8228 used_xadj = pcbddc->mat_graph->xadj; 8229 used_adjncy = pcbddc->mat_graph->adjncy; 8230 } else if (pcbddc->computed_rowadj) { 8231 used_xadj = pcbddc->mat_graph->xadj; 8232 used_adjncy = pcbddc->mat_graph->adjncy; 8233 } else { 8234 PetscBool flg_row=PETSC_FALSE; 8235 const PetscInt *xadj,*adjncy; 8236 PetscInt nvtxs; 8237 8238 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8239 if (flg_row) { 8240 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8241 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8242 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8243 free_used_adj = PETSC_TRUE; 8244 } else { 8245 pcbddc->sub_schurs_layers = -1; 8246 used_xadj = NULL; 8247 used_adjncy = NULL; 8248 } 8249 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8250 } 8251 } 8252 8253 /* setup sub_schurs data */ 8254 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8255 if (!sub_schurs->schur_explicit) { 8256 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8257 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8258 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8259 } else { 8260 Mat change = NULL; 8261 Vec scaling = NULL; 8262 IS change_primal = NULL, iP; 8263 PetscInt benign_n; 8264 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8265 PetscBool isseqaij,need_change = PETSC_FALSE; 8266 PetscBool discrete_harmonic = PETSC_FALSE; 8267 8268 if (!pcbddc->use_vertices && reuse_solvers) { 8269 PetscInt n_vertices; 8270 8271 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8272 reuse_solvers = (PetscBool)!n_vertices; 8273 } 8274 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8275 if (!isseqaij) { 8276 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8277 if (matis->A == pcbddc->local_mat) { 8278 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8279 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8280 } else { 8281 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8282 } 8283 } 8284 if (!pcbddc->benign_change_explicit) { 8285 benign_n = pcbddc->benign_n; 8286 } else { 8287 benign_n = 0; 8288 } 8289 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8290 We need a global reduction to avoid possible deadlocks. 8291 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8292 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8293 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8294 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8295 need_change = (PetscBool)(!need_change); 8296 } 8297 /* If the user defines additional constraints, we import them here. 8298 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8299 if (need_change) { 8300 PC_IS *pcisf; 8301 PC_BDDC *pcbddcf; 8302 PC pcf; 8303 8304 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8305 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8306 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8307 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8308 8309 /* hacks */ 8310 pcisf = (PC_IS*)pcf->data; 8311 pcisf->is_B_local = pcis->is_B_local; 8312 pcisf->vec1_N = pcis->vec1_N; 8313 pcisf->BtoNmap = pcis->BtoNmap; 8314 pcisf->n = pcis->n; 8315 pcisf->n_B = pcis->n_B; 8316 pcbddcf = (PC_BDDC*)pcf->data; 8317 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8318 pcbddcf->mat_graph = pcbddc->mat_graph; 8319 pcbddcf->use_faces = PETSC_TRUE; 8320 pcbddcf->use_change_of_basis = PETSC_TRUE; 8321 pcbddcf->use_change_on_faces = PETSC_TRUE; 8322 pcbddcf->use_qr_single = PETSC_TRUE; 8323 pcbddcf->fake_change = PETSC_TRUE; 8324 8325 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8326 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8327 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8328 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8329 change = pcbddcf->ConstraintMatrix; 8330 pcbddcf->ConstraintMatrix = NULL; 8331 8332 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8333 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8334 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8335 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8336 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8337 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8338 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8339 pcf->ops->destroy = NULL; 8340 pcf->ops->reset = NULL; 8341 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8342 } 8343 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8344 8345 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 8346 if (iP) { 8347 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 8348 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 8349 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8350 } 8351 if (discrete_harmonic) { 8352 Mat A; 8353 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 8354 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 8355 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 8356 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8357 ierr = MatDestroy(&A);CHKERRQ(ierr); 8358 } else { 8359 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8360 } 8361 ierr = MatDestroy(&change);CHKERRQ(ierr); 8362 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8363 } 8364 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8365 8366 /* free adjacency */ 8367 if (free_used_adj) { 8368 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8369 } 8370 PetscFunctionReturn(0); 8371 } 8372 8373 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8374 { 8375 PC_IS *pcis=(PC_IS*)pc->data; 8376 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8377 PCBDDCGraph graph; 8378 PetscErrorCode ierr; 8379 8380 PetscFunctionBegin; 8381 /* attach interface graph for determining subsets */ 8382 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8383 IS verticesIS,verticescomm; 8384 PetscInt vsize,*idxs; 8385 8386 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8387 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8388 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8389 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8390 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8391 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8392 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8393 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8394 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8395 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8396 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8397 } else { 8398 graph = pcbddc->mat_graph; 8399 } 8400 /* print some info */ 8401 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8402 IS vertices; 8403 PetscInt nv,nedges,nfaces; 8404 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8405 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8406 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8407 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8408 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8409 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8410 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8411 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8412 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8413 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8414 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8415 } 8416 8417 /* sub_schurs init */ 8418 if (!pcbddc->sub_schurs) { 8419 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8420 } 8421 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8422 pcbddc->sub_schurs->prefix = ((PetscObject)pc)->prefix; 8423 8424 /* free graph struct */ 8425 if (pcbddc->sub_schurs_rebuild) { 8426 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8427 } 8428 PetscFunctionReturn(0); 8429 } 8430 8431 PetscErrorCode PCBDDCCheckOperator(PC pc) 8432 { 8433 PC_IS *pcis=(PC_IS*)pc->data; 8434 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8435 PetscErrorCode ierr; 8436 8437 PetscFunctionBegin; 8438 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8439 IS zerodiag = NULL; 8440 Mat S_j,B0_B=NULL; 8441 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8442 PetscScalar *p0_check,*array,*array2; 8443 PetscReal norm; 8444 PetscInt i; 8445 8446 /* B0 and B0_B */ 8447 if (zerodiag) { 8448 IS dummy; 8449 8450 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8451 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8452 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8453 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8454 } 8455 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8456 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8457 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8458 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8459 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8460 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8461 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8462 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8463 /* S_j */ 8464 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8465 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8466 8467 /* mimic vector in \widetilde{W}_\Gamma */ 8468 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8469 /* continuous in primal space */ 8470 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8471 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8472 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8473 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8474 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8475 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8476 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8477 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8478 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8479 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8480 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8481 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8482 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8483 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8484 8485 /* assemble rhs for coarse problem */ 8486 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8487 /* local with Schur */ 8488 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8489 if (zerodiag) { 8490 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8491 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8492 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8493 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8494 } 8495 /* sum on primal nodes the local contributions */ 8496 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8497 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8498 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8499 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8500 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8501 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8502 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8503 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8504 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8505 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8506 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8507 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8508 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8509 /* scale primal nodes (BDDC sums contibutions) */ 8510 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8511 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8512 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8513 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8514 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8515 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8516 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8517 /* global: \widetilde{B0}_B w_\Gamma */ 8518 if (zerodiag) { 8519 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8520 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8521 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8522 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8523 } 8524 /* BDDC */ 8525 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8526 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8527 8528 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8529 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8530 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8531 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8532 for (i=0;i<pcbddc->benign_n;i++) { 8533 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8534 } 8535 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8536 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8537 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8538 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8539 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8540 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8541 } 8542 PetscFunctionReturn(0); 8543 } 8544 8545 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8546 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8547 { 8548 Mat At; 8549 IS rows; 8550 PetscInt rst,ren; 8551 PetscErrorCode ierr; 8552 PetscLayout rmap; 8553 8554 PetscFunctionBegin; 8555 rst = ren = 0; 8556 if (ccomm != MPI_COMM_NULL) { 8557 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8558 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8559 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8560 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8561 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8562 } 8563 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8564 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8565 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8566 8567 if (ccomm != MPI_COMM_NULL) { 8568 Mat_MPIAIJ *a,*b; 8569 IS from,to; 8570 Vec gvec; 8571 PetscInt lsize; 8572 8573 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8574 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8575 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8576 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8577 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8578 a = (Mat_MPIAIJ*)At->data; 8579 b = (Mat_MPIAIJ*)(*B)->data; 8580 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8581 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8582 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8583 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8584 b->A = a->A; 8585 b->B = a->B; 8586 8587 b->donotstash = a->donotstash; 8588 b->roworiented = a->roworiented; 8589 b->rowindices = 0; 8590 b->rowvalues = 0; 8591 b->getrowactive = PETSC_FALSE; 8592 8593 (*B)->rmap = rmap; 8594 (*B)->factortype = A->factortype; 8595 (*B)->assembled = PETSC_TRUE; 8596 (*B)->insertmode = NOT_SET_VALUES; 8597 (*B)->preallocated = PETSC_TRUE; 8598 8599 if (a->colmap) { 8600 #if defined(PETSC_USE_CTABLE) 8601 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8602 #else 8603 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8604 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8605 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8606 #endif 8607 } else b->colmap = 0; 8608 if (a->garray) { 8609 PetscInt len; 8610 len = a->B->cmap->n; 8611 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8612 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8613 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8614 } else b->garray = 0; 8615 8616 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8617 b->lvec = a->lvec; 8618 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8619 8620 /* cannot use VecScatterCopy */ 8621 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8622 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8623 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8624 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8625 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8626 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8627 ierr = ISDestroy(&from);CHKERRQ(ierr); 8628 ierr = ISDestroy(&to);CHKERRQ(ierr); 8629 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8630 } 8631 ierr = MatDestroy(&At);CHKERRQ(ierr); 8632 PetscFunctionReturn(0); 8633 } 8634