1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 #if !defined(PETSC_USE_COMPLEX) 18 PetscScalar *uwork,*data,*U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM,bN,lwork,lierr,di = 1; 21 PetscInt ulw,i,nr,nc,n; 22 PetscErrorCode ierr; 23 24 PetscFunctionBegin; 25 #if defined(PETSC_MISSING_LAPACK_GESVD) 26 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 27 #else 28 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 29 if (!nr || !nc) PetscFunctionReturn(0); 30 31 /* workspace */ 32 if (!work) { 33 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 34 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 35 } else { 36 ulw = lw; 37 uwork = work; 38 } 39 n = PetscMin(nr,nc); 40 if (!rwork) { 41 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 42 } else { 43 sing = rwork; 44 } 45 46 /* SVD */ 47 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 49 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 50 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 51 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 52 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 53 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 54 ierr = PetscFPTrapPop();CHKERRQ(ierr); 55 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 56 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 57 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 58 if (!rwork) { 59 ierr = PetscFree(sing);CHKERRQ(ierr); 60 } 61 if (!work) { 62 ierr = PetscFree(uwork);CHKERRQ(ierr); 63 } 64 /* create B */ 65 if (!range) { 66 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 67 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 68 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 69 } else { 70 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 71 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 72 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 73 } 74 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 75 ierr = PetscFree(U);CHKERRQ(ierr); 76 #endif 77 #else /* PETSC_USE_COMPLEX */ 78 PetscFunctionBegin; 79 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 80 #endif 81 PetscFunctionReturn(0); 82 } 83 84 /* TODO REMOVE */ 85 #if defined(PRINT_GDET) 86 static int inc = 0; 87 static int lev = 0; 88 #endif 89 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 PetscScalar *vals,v; 122 123 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 124 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 125 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 126 /* v = PetscAbsScalar(vals[0]) */; 127 v = 1.; 128 cvals[0] = vals[0]/v; 129 cvals[1] = vals[1]/v; 130 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 131 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 132 #if defined(PRINT_GDET) 133 { 134 PetscViewer viewer; 135 char filename[256]; 136 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 137 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 138 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 140 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 141 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 142 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 143 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 144 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 145 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 146 } 147 #endif 148 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 149 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 150 } 151 152 PetscFunctionReturn(0); 153 } 154 155 PetscErrorCode PCBDDCNedelecSupport(PC pc) 156 { 157 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 158 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 159 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 160 Vec tvec; 161 PetscSF sfv; 162 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 163 MPI_Comm comm; 164 IS lned,primals,allprimals,nedfieldlocal; 165 IS *eedges,*extrows,*extcols,*alleedges; 166 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 167 PetscScalar *vals,*work; 168 PetscReal *rwork; 169 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 170 PetscInt ne,nv,Lv,order,n,field; 171 PetscInt n_neigh,*neigh,*n_shared,**shared; 172 PetscInt i,j,extmem,cum,maxsize,nee; 173 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 174 PetscInt *sfvleaves,*sfvroots; 175 PetscInt *corners,*cedges; 176 PetscInt *ecount,**eneighs,*vcount,**vneighs; 177 #if defined(PETSC_USE_DEBUG) 178 PetscInt *emarks; 179 #endif 180 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 181 PetscErrorCode ierr; 182 183 PetscFunctionBegin; 184 /* If the discrete gradient is defined for a subset of dofs and global is true, 185 it assumes G is given in global ordering for all the dofs. 186 Otherwise, the ordering is global for the Nedelec field */ 187 order = pcbddc->nedorder; 188 conforming = pcbddc->conforming; 189 field = pcbddc->nedfield; 190 global = pcbddc->nedglobal; 191 setprimal = PETSC_FALSE; 192 print = PETSC_FALSE; 193 singular = PETSC_FALSE; 194 195 /* Command line customization */ 196 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 199 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 200 /* print debug info TODO: to be removed */ 201 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 202 ierr = PetscOptionsEnd();CHKERRQ(ierr); 203 204 /* Return if there are no edges in the decomposition and the problem is not singular */ 205 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 206 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 207 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 208 if (!singular) { 209 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 210 lrc[0] = PETSC_FALSE; 211 for (i=0;i<n;i++) { 212 if (PetscRealPart(vals[i]) > 2.) { 213 lrc[0] = PETSC_TRUE; 214 break; 215 } 216 } 217 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 218 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 219 if (!lrc[1]) PetscFunctionReturn(0); 220 } 221 222 /* Get Nedelec field */ 223 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 224 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 225 if (pcbddc->n_ISForDofsLocal && field >= 0) { 226 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 227 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 228 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 229 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 230 ne = n; 231 nedfieldlocal = NULL; 232 global = PETSC_TRUE; 233 } else if (field == PETSC_DECIDE) { 234 PetscInt rst,ren,*idx; 235 236 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 237 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 238 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 239 for (i=rst;i<ren;i++) { 240 PetscInt nc; 241 242 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 243 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 244 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 245 } 246 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 247 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 248 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 249 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 250 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 251 } else { 252 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 253 } 254 255 /* Sanity checks */ 256 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 257 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 258 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 259 260 /* Just set primal dofs and return */ 261 if (setprimal) { 262 IS enedfieldlocal; 263 PetscInt *eidxs; 264 265 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 266 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 267 if (nedfieldlocal) { 268 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 269 for (i=0,cum=0;i<ne;i++) { 270 if (PetscRealPart(vals[idxs[i]]) > 2.) { 271 eidxs[cum++] = idxs[i]; 272 } 273 } 274 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 275 } else { 276 for (i=0,cum=0;i<ne;i++) { 277 if (PetscRealPart(vals[i]) > 2.) { 278 eidxs[cum++] = i; 279 } 280 } 281 } 282 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 283 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 284 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 285 ierr = PetscFree(eidxs);CHKERRQ(ierr); 286 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 287 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 288 PetscFunctionReturn(0); 289 } 290 291 /* Compute some l2g maps */ 292 if (nedfieldlocal) { 293 IS is; 294 295 /* need to map from the local Nedelec field to local numbering */ 296 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 297 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 298 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 299 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 300 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 301 if (global) { 302 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 303 el2g = al2g; 304 } else { 305 IS gis; 306 307 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 308 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 309 ierr = ISDestroy(&gis);CHKERRQ(ierr); 310 } 311 ierr = ISDestroy(&is);CHKERRQ(ierr); 312 } else { 313 /* restore default */ 314 pcbddc->nedfield = -1; 315 /* one ref for the destruction of al2g, one for el2g */ 316 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 317 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 318 el2g = al2g; 319 fl2g = NULL; 320 } 321 322 /* Start communication to drop connections for interior edges (for cc analysis only) */ 323 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 324 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 325 if (nedfieldlocal) { 326 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 327 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 328 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 329 } else { 330 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 331 } 332 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 333 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 334 335 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 336 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 337 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 338 if (global) { 339 PetscInt rst; 340 341 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 342 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 343 if (matis->sf_rootdata[i] < 2) { 344 matis->sf_rootdata[cum++] = i + rst; 345 } 346 } 347 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 348 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 349 } else { 350 PetscInt *tbz; 351 352 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 353 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 354 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 355 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 356 for (i=0,cum=0;i<ne;i++) 357 if (matis->sf_leafdata[idxs[i]] == 1) 358 tbz[cum++] = i; 359 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 360 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 361 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 362 ierr = PetscFree(tbz);CHKERRQ(ierr); 363 } 364 } else { /* we need the entire G to infer the nullspace */ 365 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 366 G = pcbddc->discretegradient; 367 } 368 369 /* Extract subdomain relevant rows of G */ 370 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 371 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 372 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 373 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 374 ierr = ISDestroy(&lned);CHKERRQ(ierr); 375 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 376 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 377 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 378 379 /* SF for nodal dofs communications */ 380 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 381 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 382 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 384 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 386 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 387 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 388 i = singular ? 2 : 1; 389 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 390 391 /* Destroy temporary G created in MATIS format and modified G */ 392 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 393 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 394 ierr = MatDestroy(&G);CHKERRQ(ierr); 395 396 if (print) { 397 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 398 ierr = MatView(lG,NULL);CHKERRQ(ierr); 399 } 400 401 /* Save lG for values insertion in change of basis */ 402 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 403 404 /* Analyze the edge-nodes connections (duplicate lG) */ 405 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 406 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 407 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 409 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 410 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 411 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 412 /* need to import the boundary specification to ensure the 413 proper detection of coarse edges' endpoints */ 414 if (pcbddc->DirichletBoundariesLocal) { 415 IS is; 416 417 if (fl2g) { 418 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 419 } else { 420 is = pcbddc->DirichletBoundariesLocal; 421 } 422 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 423 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 424 for (i=0;i<cum;i++) { 425 if (idxs[i] >= 0) { 426 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 427 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 428 } 429 } 430 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 431 if (fl2g) { 432 ierr = ISDestroy(&is);CHKERRQ(ierr); 433 } 434 } 435 if (pcbddc->NeumannBoundariesLocal) { 436 IS is; 437 438 if (fl2g) { 439 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 440 } else { 441 is = pcbddc->NeumannBoundariesLocal; 442 } 443 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 444 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 445 for (i=0;i<cum;i++) { 446 if (idxs[i] >= 0) { 447 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 448 } 449 } 450 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 451 if (fl2g) { 452 ierr = ISDestroy(&is);CHKERRQ(ierr); 453 } 454 } 455 456 /* Count neighs per dof */ 457 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 458 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 459 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 460 for (i=1,cum=0;i<n_neigh;i++) { 461 cum += n_shared[i]; 462 for (j=0;j<n_shared[i];j++) { 463 ecount[shared[i][j]]++; 464 } 465 } 466 if (ne) { 467 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 468 } 469 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 470 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 471 for (i=1;i<n_neigh;i++) { 472 for (j=0;j<n_shared[i];j++) { 473 PetscInt k = shared[i][j]; 474 eneighs[k][ecount[k]] = neigh[i]; 475 ecount[k]++; 476 } 477 } 478 for (i=0;i<ne;i++) { 479 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 480 } 481 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 482 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 483 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 484 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 485 for (i=1,cum=0;i<n_neigh;i++) { 486 cum += n_shared[i]; 487 for (j=0;j<n_shared[i];j++) { 488 vcount[shared[i][j]]++; 489 } 490 } 491 if (nv) { 492 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 493 } 494 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 495 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 496 for (i=1;i<n_neigh;i++) { 497 for (j=0;j<n_shared[i];j++) { 498 PetscInt k = shared[i][j]; 499 vneighs[k][vcount[k]] = neigh[i]; 500 vcount[k]++; 501 } 502 } 503 for (i=0;i<nv;i++) { 504 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 505 } 506 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 507 508 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 509 for proper detection of coarse edges' endpoints */ 510 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 511 for (i=0;i<ne;i++) { 512 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 513 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 514 } 515 } 516 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 517 if (!conforming) { 518 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 519 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 520 } 521 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 522 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 523 cum = 0; 524 for (i=0;i<ne;i++) { 525 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 526 if (!PetscBTLookup(btee,i)) { 527 marks[cum++] = i; 528 continue; 529 } 530 /* set badly connected edge dofs as primal */ 531 if (!conforming) { 532 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 533 marks[cum++] = i; 534 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 535 for (j=ii[i];j<ii[i+1];j++) { 536 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 537 } 538 } else { 539 /* every edge dofs should be connected trough a certain number of nodal dofs 540 to other edge dofs belonging to coarse edges 541 - at most 2 endpoints 542 - order-1 interior nodal dofs 543 - no undefined nodal dofs (nconn < order) 544 */ 545 PetscInt ends = 0,ints = 0, undef = 0; 546 for (j=ii[i];j<ii[i+1];j++) { 547 PetscInt v = jj[j],k; 548 PetscInt nconn = iit[v+1]-iit[v]; 549 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 550 if (nconn > order) ends++; 551 else if (nconn == order) ints++; 552 else undef++; 553 } 554 if (undef || ends > 2 || ints != order -1) { 555 marks[cum++] = i; 556 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 557 for (j=ii[i];j<ii[i+1];j++) { 558 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 559 } 560 } 561 } 562 } 563 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 564 if (!order && ii[i+1] != ii[i]) { 565 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 566 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 567 } 568 } 569 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 570 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 571 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 572 if (!conforming) { 573 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 574 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 575 } 576 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 577 578 /* identify splitpoints and corner candidates */ 579 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 580 if (print) { 581 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 582 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 583 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 584 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 585 } 586 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 587 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 588 for (i=0;i<nv;i++) { 589 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 590 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 591 if (!order) { /* variable order */ 592 PetscReal vorder = 0.; 593 594 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 595 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 596 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 597 ord = 1; 598 } 599 #if defined(PETSC_USE_DEBUG) 600 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 601 #endif 602 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 603 if (PetscBTLookup(btbd,jj[j])) { 604 bdir = PETSC_TRUE; 605 break; 606 } 607 if (vc != ecount[jj[j]]) { 608 sneighs = PETSC_FALSE; 609 } else { 610 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 611 for (k=0;k<vc;k++) { 612 if (vn[k] != en[k]) { 613 sneighs = PETSC_FALSE; 614 break; 615 } 616 } 617 } 618 } 619 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 620 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 621 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 622 } else if (test == ord) { 623 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 624 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 625 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 626 } else { 627 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 628 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 629 } 630 } 631 } 632 ierr = PetscFree(ecount);CHKERRQ(ierr); 633 ierr = PetscFree(vcount);CHKERRQ(ierr); 634 if (ne) { 635 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 636 } 637 if (nv) { 638 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 639 } 640 ierr = PetscFree(eneighs);CHKERRQ(ierr); 641 ierr = PetscFree(vneighs);CHKERRQ(ierr); 642 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 643 644 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 645 if (order != 1) { 646 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 647 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 648 for (i=0;i<nv;i++) { 649 if (PetscBTLookup(btvcand,i)) { 650 PetscBool found = PETSC_FALSE; 651 for (j=ii[i];j<ii[i+1] && !found;j++) { 652 PetscInt k,e = jj[j]; 653 if (PetscBTLookup(bte,e)) continue; 654 for (k=iit[e];k<iit[e+1];k++) { 655 PetscInt v = jjt[k]; 656 if (v != i && PetscBTLookup(btvcand,v)) { 657 found = PETSC_TRUE; 658 break; 659 } 660 } 661 } 662 if (!found) { 663 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 664 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 665 } else { 666 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 667 } 668 } 669 } 670 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 671 } 672 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 673 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 674 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 675 676 /* Get the local G^T explicitly */ 677 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 678 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 679 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 680 681 /* Mark interior nodal dofs */ 682 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 683 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 684 for (i=1;i<n_neigh;i++) { 685 for (j=0;j<n_shared[i];j++) { 686 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 687 } 688 } 689 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 690 691 /* communicate corners and splitpoints */ 692 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 693 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 694 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 695 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 696 697 if (print) { 698 IS tbz; 699 700 cum = 0; 701 for (i=0;i<nv;i++) 702 if (sfvleaves[i]) 703 vmarks[cum++] = i; 704 705 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 706 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 707 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 708 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 709 } 710 711 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 712 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 713 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 714 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 715 716 /* Zero rows of lGt corresponding to identified corners 717 and interior nodal dofs */ 718 cum = 0; 719 for (i=0;i<nv;i++) { 720 if (sfvleaves[i]) { 721 vmarks[cum++] = i; 722 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 723 } 724 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 725 } 726 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 727 if (print) { 728 IS tbz; 729 730 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 731 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 732 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 733 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 734 } 735 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 736 ierr = PetscFree(vmarks);CHKERRQ(ierr); 737 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 738 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 739 740 /* Recompute G */ 741 ierr = MatDestroy(&lG);CHKERRQ(ierr); 742 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 743 if (print) { 744 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 745 ierr = MatView(lG,NULL);CHKERRQ(ierr); 746 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 747 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 748 } 749 750 /* Get primal dofs (if any) */ 751 cum = 0; 752 for (i=0;i<ne;i++) { 753 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 754 } 755 if (fl2g) { 756 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 757 } 758 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 759 if (print) { 760 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 761 ierr = ISView(primals,NULL);CHKERRQ(ierr); 762 } 763 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 764 /* TODO: what if the user passed in some of them ? */ 765 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 766 ierr = ISDestroy(&primals);CHKERRQ(ierr); 767 768 /* Compute edge connectivity */ 769 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 770 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 771 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 772 if (fl2g) { 773 PetscBT btf; 774 PetscInt *iia,*jja,*iiu,*jju; 775 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 776 777 /* create CSR for all local dofs */ 778 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 779 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 780 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 781 iiu = pcbddc->mat_graph->xadj; 782 jju = pcbddc->mat_graph->adjncy; 783 } else if (pcbddc->use_local_adj) { 784 rest = PETSC_TRUE; 785 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 786 } else { 787 free = PETSC_TRUE; 788 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 789 iiu[0] = 0; 790 for (i=0;i<n;i++) { 791 iiu[i+1] = i+1; 792 jju[i] = -1; 793 } 794 } 795 796 /* import sizes of CSR */ 797 iia[0] = 0; 798 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 799 800 /* overwrite entries corresponding to the Nedelec field */ 801 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 802 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 803 for (i=0;i<ne;i++) { 804 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 805 iia[idxs[i]+1] = ii[i+1]-ii[i]; 806 } 807 808 /* iia in CSR */ 809 for (i=0;i<n;i++) iia[i+1] += iia[i]; 810 811 /* jja in CSR */ 812 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 813 for (i=0;i<n;i++) 814 if (!PetscBTLookup(btf,i)) 815 for (j=0;j<iiu[i+1]-iiu[i];j++) 816 jja[iia[i]+j] = jju[iiu[i]+j]; 817 818 /* map edge dofs connectivity */ 819 if (jj) { 820 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 821 for (i=0;i<ne;i++) { 822 PetscInt e = idxs[i]; 823 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 824 } 825 } 826 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 827 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 828 if (rest) { 829 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 830 } 831 if (free) { 832 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 833 } 834 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 835 } else { 836 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 837 } 838 839 /* Analyze interface for edge dofs */ 840 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 841 pcbddc->mat_graph->twodim = PETSC_FALSE; 842 843 /* Get coarse edges in the edge space */ 844 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 845 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 846 847 if (fl2g) { 848 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 849 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 850 for (i=0;i<nee;i++) { 851 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 852 } 853 } else { 854 eedges = alleedges; 855 primals = allprimals; 856 } 857 858 /* Mark fine edge dofs with their coarse edge id */ 859 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 860 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 861 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 862 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 863 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 864 if (print) { 865 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 866 ierr = ISView(primals,NULL);CHKERRQ(ierr); 867 } 868 869 maxsize = 0; 870 for (i=0;i<nee;i++) { 871 PetscInt size,mark = i+1; 872 873 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 874 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 875 for (j=0;j<size;j++) marks[idxs[j]] = mark; 876 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 877 maxsize = PetscMax(maxsize,size); 878 } 879 880 /* Find coarse edge endpoints */ 881 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 882 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 883 for (i=0;i<nee;i++) { 884 PetscInt mark = i+1,size; 885 886 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 887 if (!size && nedfieldlocal) continue; 888 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 889 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 890 if (print) { 891 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 892 ISView(eedges[i],NULL); 893 } 894 for (j=0;j<size;j++) { 895 PetscInt k, ee = idxs[j]; 896 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 897 for (k=ii[ee];k<ii[ee+1];k++) { 898 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 899 if (PetscBTLookup(btv,jj[k])) { 900 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 901 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 902 PetscInt k2; 903 PetscBool corner = PETSC_FALSE; 904 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 905 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 906 /* it's a corner if either is connected with an edge dof belonging to a different cc or 907 if the edge dof lie on the natural part of the boundary */ 908 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 909 corner = PETSC_TRUE; 910 break; 911 } 912 } 913 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 914 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 915 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 916 } else { 917 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 918 } 919 } 920 } 921 } 922 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 923 } 924 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 925 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 926 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 927 928 /* Reset marked primal dofs */ 929 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 930 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 931 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 932 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 933 934 /* Now use the initial lG */ 935 ierr = MatDestroy(&lG);CHKERRQ(ierr); 936 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 937 lG = lGinit; 938 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 939 940 /* Compute extended cols indices */ 941 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 942 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 943 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 944 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 945 i *= maxsize; 946 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 947 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 948 eerr = PETSC_FALSE; 949 for (i=0;i<nee;i++) { 950 PetscInt size,found = 0; 951 952 cum = 0; 953 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 954 if (!size && nedfieldlocal) continue; 955 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 956 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 957 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 958 for (j=0;j<size;j++) { 959 PetscInt k,ee = idxs[j]; 960 for (k=ii[ee];k<ii[ee+1];k++) { 961 PetscInt vv = jj[k]; 962 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 963 else if (!PetscBTLookupSet(btvc,vv)) found++; 964 } 965 } 966 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 967 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 968 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 969 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 970 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 971 /* it may happen that endpoints are not defined at this point 972 if it is the case, mark this edge for a second pass */ 973 if (cum != size -1 || found != 2) { 974 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 975 if (print) { 976 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 977 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 978 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 979 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 980 } 981 eerr = PETSC_TRUE; 982 } 983 } 984 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 985 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 986 if (done) { 987 PetscInt *newprimals; 988 989 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 990 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 991 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 992 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 993 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 994 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 995 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 996 for (i=0;i<nee;i++) { 997 PetscBool has_candidates = PETSC_FALSE; 998 if (PetscBTLookup(bter,i)) { 999 PetscInt size,mark = i+1; 1000 1001 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1002 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1003 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1004 for (j=0;j<size;j++) { 1005 PetscInt k,ee = idxs[j]; 1006 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1007 for (k=ii[ee];k<ii[ee+1];k++) { 1008 /* set all candidates located on the edge as corners */ 1009 if (PetscBTLookup(btvcand,jj[k])) { 1010 PetscInt k2,vv = jj[k]; 1011 has_candidates = PETSC_TRUE; 1012 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1013 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1014 /* set all edge dofs connected to candidate as primals */ 1015 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1016 if (marks[jjt[k2]] == mark) { 1017 PetscInt k3,ee2 = jjt[k2]; 1018 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1019 newprimals[cum++] = ee2; 1020 /* finally set the new corners */ 1021 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1022 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1023 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1024 } 1025 } 1026 } 1027 } else { 1028 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1029 } 1030 } 1031 } 1032 if (!has_candidates) { /* circular edge */ 1033 PetscInt k, ee = idxs[0],*tmarks; 1034 1035 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1036 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1037 for (k=ii[ee];k<ii[ee+1];k++) { 1038 PetscInt k2; 1039 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1040 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1041 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1042 } 1043 for (j=0;j<size;j++) { 1044 if (tmarks[idxs[j]] > 1) { 1045 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1046 newprimals[cum++] = idxs[j]; 1047 } 1048 } 1049 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1050 } 1051 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1052 } 1053 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1054 } 1055 ierr = PetscFree(extcols);CHKERRQ(ierr); 1056 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1057 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1058 if (fl2g) { 1059 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1060 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1061 for (i=0;i<nee;i++) { 1062 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1063 } 1064 ierr = PetscFree(eedges);CHKERRQ(ierr); 1065 } 1066 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1067 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1068 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1069 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1070 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1071 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1072 pcbddc->mat_graph->twodim = PETSC_FALSE; 1073 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1074 if (fl2g) { 1075 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1076 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1077 for (i=0;i<nee;i++) { 1078 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1079 } 1080 } else { 1081 eedges = alleedges; 1082 primals = allprimals; 1083 } 1084 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1085 1086 /* Mark again */ 1087 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1088 for (i=0;i<nee;i++) { 1089 PetscInt size,mark = i+1; 1090 1091 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1092 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1093 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1094 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1095 } 1096 if (print) { 1097 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1098 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1099 } 1100 1101 /* Recompute extended cols */ 1102 eerr = PETSC_FALSE; 1103 for (i=0;i<nee;i++) { 1104 PetscInt size; 1105 1106 cum = 0; 1107 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1108 if (!size && nedfieldlocal) continue; 1109 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1110 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1111 for (j=0;j<size;j++) { 1112 PetscInt k,ee = idxs[j]; 1113 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1114 } 1115 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1116 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1117 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1118 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1119 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1120 if (cum != size -1) { 1121 if (print) { 1122 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1123 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1125 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1126 } 1127 eerr = PETSC_TRUE; 1128 } 1129 } 1130 } 1131 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1132 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1133 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1134 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1135 /* an error should not occur at this point */ 1136 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1137 1138 /* Check the number of endpoints */ 1139 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1140 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1141 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1142 for (i=0;i<nee;i++) { 1143 PetscInt size, found = 0, gc[2]; 1144 1145 /* init with defaults */ 1146 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1147 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1148 if (!size && nedfieldlocal) continue; 1149 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1150 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1151 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1152 for (j=0;j<size;j++) { 1153 PetscInt k,ee = idxs[j]; 1154 for (k=ii[ee];k<ii[ee+1];k++) { 1155 PetscInt vv = jj[k]; 1156 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1157 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1158 corners[i*2+found++] = vv; 1159 } 1160 } 1161 } 1162 if (found != 2) { 1163 PetscInt e; 1164 if (fl2g) { 1165 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1166 } else { 1167 e = idxs[0]; 1168 } 1169 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1170 } 1171 1172 /* get primal dof index on this coarse edge */ 1173 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1174 if (gc[0] > gc[1]) { 1175 PetscInt swap = corners[2*i]; 1176 corners[2*i] = corners[2*i+1]; 1177 corners[2*i+1] = swap; 1178 } 1179 cedges[i] = idxs[size-1]; 1180 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1181 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1182 } 1183 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1184 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1185 1186 #if defined(PETSC_USE_DEBUG) 1187 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1188 not interfere with neighbouring coarse edges */ 1189 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1190 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1191 for (i=0;i<nv;i++) { 1192 PetscInt emax = 0,eemax = 0; 1193 1194 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1195 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1196 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1197 for (j=1;j<nee+1;j++) { 1198 if (emax < emarks[j]) { 1199 emax = emarks[j]; 1200 eemax = j; 1201 } 1202 } 1203 /* not relevant for edges */ 1204 if (!eemax) continue; 1205 1206 for (j=ii[i];j<ii[i+1];j++) { 1207 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1208 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1209 } 1210 } 1211 } 1212 ierr = PetscFree(emarks);CHKERRQ(ierr); 1213 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1214 #endif 1215 1216 /* Compute extended rows indices for edge blocks of the change of basis */ 1217 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1218 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1219 extmem *= maxsize; 1220 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1221 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1222 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1223 for (i=0;i<nv;i++) { 1224 PetscInt mark = 0,size,start; 1225 1226 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1227 for (j=ii[i];j<ii[i+1];j++) 1228 if (marks[jj[j]] && !mark) 1229 mark = marks[jj[j]]; 1230 1231 /* not relevant */ 1232 if (!mark) continue; 1233 1234 /* import extended row */ 1235 mark--; 1236 start = mark*extmem+extrowcum[mark]; 1237 size = ii[i+1]-ii[i]; 1238 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1239 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1240 extrowcum[mark] += size; 1241 } 1242 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1243 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1244 ierr = PetscFree(marks);CHKERRQ(ierr); 1245 1246 /* Compress extrows */ 1247 cum = 0; 1248 for (i=0;i<nee;i++) { 1249 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1250 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1251 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1252 cum = PetscMax(cum,size); 1253 } 1254 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1255 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1256 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1257 1258 /* Workspace for lapack inner calls and VecSetValues */ 1259 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1260 1261 /* Create change of basis matrix (preallocation can be improved) */ 1262 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1263 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1264 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1265 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1266 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1267 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1268 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1269 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1270 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1271 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1272 1273 /* Defaults to identity */ 1274 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1275 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1276 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1277 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1278 1279 /* Create discrete gradient for the coarser level if needed */ 1280 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1281 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1282 if (pcbddc->current_level < pcbddc->max_levels) { 1283 ISLocalToGlobalMapping cel2g,cvl2g; 1284 IS wis,gwis; 1285 PetscInt cnv,cne; 1286 1287 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1288 if (fl2g) { 1289 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1290 } else { 1291 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1292 pcbddc->nedclocal = wis; 1293 } 1294 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1295 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1296 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1297 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1298 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1299 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1300 1301 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1302 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1303 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1304 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1305 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1306 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1307 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1308 1309 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1310 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1311 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1312 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1313 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1314 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1315 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1316 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1317 } 1318 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1319 1320 #if defined(PRINT_GDET) 1321 inc = 0; 1322 lev = pcbddc->current_level; 1323 #endif 1324 1325 /* Insert values in the change of basis matrix */ 1326 for (i=0;i<nee;i++) { 1327 Mat Gins = NULL, GKins = NULL; 1328 IS cornersis = NULL; 1329 PetscScalar cvals[2]; 1330 1331 if (pcbddc->nedcG) { 1332 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1333 } 1334 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1335 if (Gins && GKins) { 1336 PetscScalar *data; 1337 const PetscInt *rows,*cols; 1338 PetscInt nrh,nch,nrc,ncc; 1339 1340 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1341 /* H1 */ 1342 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1343 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1344 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1345 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1346 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1347 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1348 /* complement */ 1349 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1350 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1351 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1352 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1353 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1354 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1355 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1356 1357 /* coarse discrete gradient */ 1358 if (pcbddc->nedcG) { 1359 PetscInt cols[2]; 1360 1361 cols[0] = 2*i; 1362 cols[1] = 2*i+1; 1363 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1364 } 1365 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1366 } 1367 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1368 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1369 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1370 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1371 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1372 } 1373 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1374 1375 /* Start assembling */ 1376 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1377 if (pcbddc->nedcG) { 1378 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1379 } 1380 1381 /* Free */ 1382 if (fl2g) { 1383 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1384 for (i=0;i<nee;i++) { 1385 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(eedges);CHKERRQ(ierr); 1388 } 1389 1390 /* hack mat_graph with primal dofs on the coarse edges */ 1391 { 1392 PCBDDCGraph graph = pcbddc->mat_graph; 1393 PetscInt *oqueue = graph->queue; 1394 PetscInt *ocptr = graph->cptr; 1395 PetscInt ncc,*idxs; 1396 1397 /* find first primal edge */ 1398 if (pcbddc->nedclocal) { 1399 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1400 } else { 1401 if (fl2g) { 1402 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1403 } 1404 idxs = cedges; 1405 } 1406 cum = 0; 1407 while (cum < nee && cedges[cum] < 0) cum++; 1408 1409 /* adapt connected components */ 1410 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1411 graph->cptr[0] = 0; 1412 for (i=0,ncc=0;i<graph->ncc;i++) { 1413 PetscInt lc = ocptr[i+1]-ocptr[i]; 1414 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1415 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1416 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1417 ncc++; 1418 lc--; 1419 cum++; 1420 while (cum < nee && cedges[cum] < 0) cum++; 1421 } 1422 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1423 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1424 ncc++; 1425 } 1426 graph->ncc = ncc; 1427 if (pcbddc->nedclocal) { 1428 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1429 } 1430 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1431 } 1432 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1433 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1434 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1435 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1436 1437 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1438 ierr = PetscFree(extrow);CHKERRQ(ierr); 1439 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1440 ierr = PetscFree(corners);CHKERRQ(ierr); 1441 ierr = PetscFree(cedges);CHKERRQ(ierr); 1442 ierr = PetscFree(extrows);CHKERRQ(ierr); 1443 ierr = PetscFree(extcols);CHKERRQ(ierr); 1444 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1445 1446 /* Complete assembling */ 1447 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1448 if (pcbddc->nedcG) { 1449 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1450 #if 0 1451 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1452 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1453 #endif 1454 } 1455 1456 /* set change of basis */ 1457 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1458 ierr = MatDestroy(&T);CHKERRQ(ierr); 1459 1460 PetscFunctionReturn(0); 1461 } 1462 1463 /* the near-null space of BDDC carries information on quadrature weights, 1464 and these can be collinear -> so cheat with MatNullSpaceCreate 1465 and create a suitable set of basis vectors first */ 1466 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1467 { 1468 PetscErrorCode ierr; 1469 PetscInt i; 1470 1471 PetscFunctionBegin; 1472 for (i=0;i<nvecs;i++) { 1473 PetscInt first,last; 1474 1475 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1476 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1477 if (i>=first && i < last) { 1478 PetscScalar *data; 1479 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1480 if (!has_const) { 1481 data[i-first] = 1.; 1482 } else { 1483 data[2*i-first] = 1./PetscSqrtReal(2.); 1484 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1485 } 1486 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1487 } 1488 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1489 } 1490 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1491 for (i=0;i<nvecs;i++) { /* reset vectors */ 1492 PetscInt first,last; 1493 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1494 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1495 if (i>=first && i < last) { 1496 PetscScalar *data; 1497 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1498 if (!has_const) { 1499 data[i-first] = 0.; 1500 } else { 1501 data[2*i-first] = 0.; 1502 data[2*i-first+1] = 0.; 1503 } 1504 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1505 } 1506 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1507 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1508 } 1509 PetscFunctionReturn(0); 1510 } 1511 1512 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1513 { 1514 Mat loc_divudotp; 1515 Vec p,v,vins,quad_vec,*quad_vecs; 1516 ISLocalToGlobalMapping map; 1517 IS *faces,*edges; 1518 PetscScalar *vals; 1519 const PetscScalar *array; 1520 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1521 PetscMPIInt rank; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1526 if (graph->twodim) { 1527 lmaxneighs = 2; 1528 } else { 1529 lmaxneighs = 1; 1530 for (i=0;i<ne;i++) { 1531 const PetscInt *idxs; 1532 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1533 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1534 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1535 } 1536 lmaxneighs++; /* graph count does not include self */ 1537 } 1538 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1539 maxsize = 0; 1540 for (i=0;i<ne;i++) { 1541 PetscInt nn; 1542 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1543 maxsize = PetscMax(maxsize,nn); 1544 } 1545 for (i=0;i<nf;i++) { 1546 PetscInt nn; 1547 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1548 maxsize = PetscMax(maxsize,nn); 1549 } 1550 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1551 /* create vectors to hold quadrature weights */ 1552 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1553 if (!transpose) { 1554 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1555 } else { 1556 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1557 } 1558 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1559 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1560 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1561 for (i=0;i<maxneighs;i++) { 1562 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1563 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1564 } 1565 1566 /* compute local quad vec */ 1567 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1568 if (!transpose) { 1569 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1570 } else { 1571 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1572 } 1573 ierr = VecSet(p,1.);CHKERRQ(ierr); 1574 if (!transpose) { 1575 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1576 } else { 1577 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1578 } 1579 if (vl2l) { 1580 Mat lA; 1581 VecScatter sc; 1582 1583 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1584 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1585 ierr = VecScatterCreate(v,vl2l,vins,NULL,&sc);CHKERRQ(ierr); 1586 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1587 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1588 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1589 } else { 1590 vins = v; 1591 } 1592 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1593 ierr = VecDestroy(&p);CHKERRQ(ierr); 1594 1595 /* insert in global quadrature vecs */ 1596 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1597 for (i=0;i<nf;i++) { 1598 const PetscInt *idxs; 1599 PetscInt idx,nn,j; 1600 1601 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1602 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1603 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1604 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1605 idx = -(idx+1); 1606 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1607 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1608 } 1609 for (i=0;i<ne;i++) { 1610 const PetscInt *idxs; 1611 PetscInt idx,nn,j; 1612 1613 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1614 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1615 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1616 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1617 idx = -(idx+1); 1618 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1619 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1620 } 1621 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1622 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1623 if (vl2l) { 1624 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1625 } 1626 ierr = VecDestroy(&v);CHKERRQ(ierr); 1627 ierr = PetscFree(vals);CHKERRQ(ierr); 1628 1629 /* assemble near null space */ 1630 for (i=0;i<maxneighs;i++) { 1631 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1632 } 1633 for (i=0;i<maxneighs;i++) { 1634 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1635 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1636 } 1637 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1638 PetscFunctionReturn(0); 1639 } 1640 1641 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1642 { 1643 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1644 PetscErrorCode ierr; 1645 1646 PetscFunctionBegin; 1647 if (primalv) { 1648 if (pcbddc->user_primal_vertices_local) { 1649 IS list[2], newp; 1650 1651 list[0] = primalv; 1652 list[1] = pcbddc->user_primal_vertices_local; 1653 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1654 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1655 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1656 pcbddc->user_primal_vertices_local = newp; 1657 } else { 1658 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1659 } 1660 } 1661 PetscFunctionReturn(0); 1662 } 1663 1664 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1665 { 1666 PetscErrorCode ierr; 1667 Vec local,global; 1668 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1669 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1670 PetscBool monolithic = PETSC_FALSE; 1671 1672 PetscFunctionBegin; 1673 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1674 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1675 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1676 /* need to convert from global to local topology information and remove references to information in global ordering */ 1677 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1678 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1679 if (monolithic) { /* just get block size to properly compute vertices */ 1680 if (pcbddc->vertex_size == 1) { 1681 ierr = MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);CHKERRQ(ierr); 1682 } 1683 goto boundary; 1684 } 1685 1686 if (pcbddc->user_provided_isfordofs) { 1687 if (pcbddc->n_ISForDofs) { 1688 PetscInt i; 1689 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1690 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1691 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1692 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1693 } 1694 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1695 pcbddc->n_ISForDofs = 0; 1696 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1697 } 1698 } else { 1699 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1700 DM dm; 1701 1702 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1703 if (!dm) { 1704 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1705 } 1706 if (dm) { 1707 IS *fields; 1708 PetscInt nf,i; 1709 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1710 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1711 for (i=0;i<nf;i++) { 1712 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1713 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1714 } 1715 ierr = PetscFree(fields);CHKERRQ(ierr); 1716 pcbddc->n_ISForDofsLocal = nf; 1717 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1718 PetscContainer c; 1719 1720 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1721 if (c) { 1722 MatISLocalFields lf; 1723 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1724 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1725 } else { /* fallback, create the default fields if bs > 1 */ 1726 PetscInt i, n = matis->A->rmap->n; 1727 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1728 if (i > 1) { 1729 pcbddc->n_ISForDofsLocal = i; 1730 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1731 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1732 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1733 } 1734 } 1735 } 1736 } 1737 } else { 1738 PetscInt i; 1739 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1740 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1741 } 1742 } 1743 } 1744 1745 boundary: 1746 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1747 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1748 } else if (pcbddc->DirichletBoundariesLocal) { 1749 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1750 } 1751 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1752 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1753 } else if (pcbddc->NeumannBoundariesLocal) { 1754 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1755 } 1756 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1757 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1758 } 1759 ierr = VecDestroy(&global);CHKERRQ(ierr); 1760 ierr = VecDestroy(&local);CHKERRQ(ierr); 1761 /* detect local disconnected subdomains if requested (use matis->A) */ 1762 if (pcbddc->detect_disconnected) { 1763 IS primalv = NULL; 1764 PetscInt i; 1765 1766 for (i=0;i<pcbddc->n_local_subs;i++) { 1767 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1768 } 1769 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1770 ierr = PCBDDCDetectDisconnectedComponents(pc,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1771 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1772 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1773 } 1774 /* early stage corner detection */ 1775 { 1776 DM dm; 1777 1778 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1779 if (dm) { 1780 PetscBool isda; 1781 1782 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1783 if (isda) { 1784 ISLocalToGlobalMapping l2l; 1785 IS corners; 1786 Mat lA; 1787 1788 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1789 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1790 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1791 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1792 if (l2l) { 1793 const PetscInt *idx; 1794 PetscInt bs,*idxout,n; 1795 1796 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1797 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1798 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1799 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1800 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1801 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1802 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1803 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1804 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1805 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1806 } else { /* not from DMDA */ 1807 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1808 } 1809 } 1810 } 1811 } 1812 PetscFunctionReturn(0); 1813 } 1814 1815 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1816 { 1817 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1818 PetscErrorCode ierr; 1819 IS nis; 1820 const PetscInt *idxs; 1821 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1822 PetscBool *ld; 1823 1824 PetscFunctionBegin; 1825 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1826 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1827 if (mop == MPI_LAND) { 1828 /* init rootdata with true */ 1829 ld = (PetscBool*) matis->sf_rootdata; 1830 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1831 } else { 1832 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1833 } 1834 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1835 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1836 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1837 ld = (PetscBool*) matis->sf_leafdata; 1838 for (i=0;i<nd;i++) 1839 if (-1 < idxs[i] && idxs[i] < n) 1840 ld[idxs[i]] = PETSC_TRUE; 1841 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1842 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1843 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1844 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1845 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1846 if (mop == MPI_LAND) { 1847 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1848 } else { 1849 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1850 } 1851 for (i=0,nnd=0;i<n;i++) 1852 if (ld[i]) 1853 nidxs[nnd++] = i; 1854 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1855 ierr = ISDestroy(is);CHKERRQ(ierr); 1856 *is = nis; 1857 PetscFunctionReturn(0); 1858 } 1859 1860 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1861 { 1862 PC_IS *pcis = (PC_IS*)(pc->data); 1863 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1864 PetscErrorCode ierr; 1865 1866 PetscFunctionBegin; 1867 if (!pcbddc->benign_have_null) { 1868 PetscFunctionReturn(0); 1869 } 1870 if (pcbddc->ChangeOfBasisMatrix) { 1871 Vec swap; 1872 1873 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1874 swap = pcbddc->work_change; 1875 pcbddc->work_change = r; 1876 r = swap; 1877 } 1878 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1879 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1880 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1881 ierr = VecSet(z,0.);CHKERRQ(ierr); 1882 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1883 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1884 if (pcbddc->ChangeOfBasisMatrix) { 1885 pcbddc->work_change = r; 1886 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1887 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1888 } 1889 PetscFunctionReturn(0); 1890 } 1891 1892 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1893 { 1894 PCBDDCBenignMatMult_ctx ctx; 1895 PetscErrorCode ierr; 1896 PetscBool apply_right,apply_left,reset_x; 1897 1898 PetscFunctionBegin; 1899 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1900 if (transpose) { 1901 apply_right = ctx->apply_left; 1902 apply_left = ctx->apply_right; 1903 } else { 1904 apply_right = ctx->apply_right; 1905 apply_left = ctx->apply_left; 1906 } 1907 reset_x = PETSC_FALSE; 1908 if (apply_right) { 1909 const PetscScalar *ax; 1910 PetscInt nl,i; 1911 1912 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1913 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1914 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1915 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1916 for (i=0;i<ctx->benign_n;i++) { 1917 PetscScalar sum,val; 1918 const PetscInt *idxs; 1919 PetscInt nz,j; 1920 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1921 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1922 sum = 0.; 1923 if (ctx->apply_p0) { 1924 val = ctx->work[idxs[nz-1]]; 1925 for (j=0;j<nz-1;j++) { 1926 sum += ctx->work[idxs[j]]; 1927 ctx->work[idxs[j]] += val; 1928 } 1929 } else { 1930 for (j=0;j<nz-1;j++) { 1931 sum += ctx->work[idxs[j]]; 1932 } 1933 } 1934 ctx->work[idxs[nz-1]] -= sum; 1935 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1936 } 1937 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1938 reset_x = PETSC_TRUE; 1939 } 1940 if (transpose) { 1941 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1942 } else { 1943 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1944 } 1945 if (reset_x) { 1946 ierr = VecResetArray(x);CHKERRQ(ierr); 1947 } 1948 if (apply_left) { 1949 PetscScalar *ay; 1950 PetscInt i; 1951 1952 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1953 for (i=0;i<ctx->benign_n;i++) { 1954 PetscScalar sum,val; 1955 const PetscInt *idxs; 1956 PetscInt nz,j; 1957 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1958 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1959 val = -ay[idxs[nz-1]]; 1960 if (ctx->apply_p0) { 1961 sum = 0.; 1962 for (j=0;j<nz-1;j++) { 1963 sum += ay[idxs[j]]; 1964 ay[idxs[j]] += val; 1965 } 1966 ay[idxs[nz-1]] += sum; 1967 } else { 1968 for (j=0;j<nz-1;j++) { 1969 ay[idxs[j]] += val; 1970 } 1971 ay[idxs[nz-1]] = 0.; 1972 } 1973 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1974 } 1975 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1976 } 1977 PetscFunctionReturn(0); 1978 } 1979 1980 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1981 { 1982 PetscErrorCode ierr; 1983 1984 PetscFunctionBegin; 1985 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1986 PetscFunctionReturn(0); 1987 } 1988 1989 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1990 { 1991 PetscErrorCode ierr; 1992 1993 PetscFunctionBegin; 1994 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1995 PetscFunctionReturn(0); 1996 } 1997 1998 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1999 { 2000 PC_IS *pcis = (PC_IS*)pc->data; 2001 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2002 PCBDDCBenignMatMult_ctx ctx; 2003 PetscErrorCode ierr; 2004 2005 PetscFunctionBegin; 2006 if (!restore) { 2007 Mat A_IB,A_BI; 2008 PetscScalar *work; 2009 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2010 2011 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2012 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2013 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 2014 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 2015 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2016 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 2017 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 2018 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 2019 ierr = PetscNew(&ctx);CHKERRQ(ierr); 2020 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 2021 ctx->apply_left = PETSC_TRUE; 2022 ctx->apply_right = PETSC_FALSE; 2023 ctx->apply_p0 = PETSC_FALSE; 2024 ctx->benign_n = pcbddc->benign_n; 2025 if (reuse) { 2026 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2027 ctx->free = PETSC_FALSE; 2028 } else { /* TODO: could be optimized for successive solves */ 2029 ISLocalToGlobalMapping N_to_D; 2030 PetscInt i; 2031 2032 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2033 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2034 for (i=0;i<pcbddc->benign_n;i++) { 2035 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2036 } 2037 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2038 ctx->free = PETSC_TRUE; 2039 } 2040 ctx->A = pcis->A_IB; 2041 ctx->work = work; 2042 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2043 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2044 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2045 pcis->A_IB = A_IB; 2046 2047 /* A_BI as A_IB^T */ 2048 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2049 pcbddc->benign_original_mat = pcis->A_BI; 2050 pcis->A_BI = A_BI; 2051 } else { 2052 if (!pcbddc->benign_original_mat) { 2053 PetscFunctionReturn(0); 2054 } 2055 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2056 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2057 pcis->A_IB = ctx->A; 2058 ctx->A = NULL; 2059 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2060 pcis->A_BI = pcbddc->benign_original_mat; 2061 pcbddc->benign_original_mat = NULL; 2062 if (ctx->free) { 2063 PetscInt i; 2064 for (i=0;i<ctx->benign_n;i++) { 2065 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2066 } 2067 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2068 } 2069 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2070 ierr = PetscFree(ctx);CHKERRQ(ierr); 2071 } 2072 PetscFunctionReturn(0); 2073 } 2074 2075 /* used just in bddc debug mode */ 2076 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2077 { 2078 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2079 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2080 Mat An; 2081 PetscErrorCode ierr; 2082 2083 PetscFunctionBegin; 2084 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2085 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2086 if (is1) { 2087 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2088 ierr = MatDestroy(&An);CHKERRQ(ierr); 2089 } else { 2090 *B = An; 2091 } 2092 PetscFunctionReturn(0); 2093 } 2094 2095 /* TODO: add reuse flag */ 2096 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2097 { 2098 Mat Bt; 2099 PetscScalar *a,*bdata; 2100 const PetscInt *ii,*ij; 2101 PetscInt m,n,i,nnz,*bii,*bij; 2102 PetscBool flg_row; 2103 PetscErrorCode ierr; 2104 2105 PetscFunctionBegin; 2106 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2107 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2108 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2109 nnz = n; 2110 for (i=0;i<ii[n];i++) { 2111 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2112 } 2113 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2114 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2115 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2116 nnz = 0; 2117 bii[0] = 0; 2118 for (i=0;i<n;i++) { 2119 PetscInt j; 2120 for (j=ii[i];j<ii[i+1];j++) { 2121 PetscScalar entry = a[j]; 2122 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 2123 bij[nnz] = ij[j]; 2124 bdata[nnz] = entry; 2125 nnz++; 2126 } 2127 } 2128 bii[i+1] = nnz; 2129 } 2130 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2131 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2132 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2133 { 2134 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2135 b->free_a = PETSC_TRUE; 2136 b->free_ij = PETSC_TRUE; 2137 } 2138 *B = Bt; 2139 PetscFunctionReturn(0); 2140 } 2141 2142 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscInt *ncc, IS* cc[], IS* primalv) 2143 { 2144 Mat B = NULL; 2145 DM dm; 2146 IS is_dummy,*cc_n; 2147 ISLocalToGlobalMapping l2gmap_dummy; 2148 PCBDDCGraph graph; 2149 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2150 PetscInt i,n; 2151 PetscInt *xadj,*adjncy; 2152 PetscBool isplex = PETSC_FALSE; 2153 PetscErrorCode ierr; 2154 2155 PetscFunctionBegin; 2156 if (ncc) *ncc = 0; 2157 if (cc) *cc = NULL; 2158 if (primalv) *primalv = NULL; 2159 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2160 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2161 if (!dm) { 2162 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2163 } 2164 if (dm) { 2165 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2166 } 2167 if (isplex) { /* this code has been modified from plexpartition.c */ 2168 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2169 PetscInt *adj = NULL; 2170 IS cellNumbering; 2171 const PetscInt *cellNum; 2172 PetscBool useCone, useClosure; 2173 PetscSection section; 2174 PetscSegBuffer adjBuffer; 2175 PetscSF sfPoint; 2176 PetscErrorCode ierr; 2177 2178 PetscFunctionBegin; 2179 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2180 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2181 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2182 /* Build adjacency graph via a section/segbuffer */ 2183 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2184 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2185 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2186 /* Always use FVM adjacency to create partitioner graph */ 2187 ierr = DMPlexGetAdjacencyUseCone(dm, &useCone);CHKERRQ(ierr); 2188 ierr = DMPlexGetAdjacencyUseClosure(dm, &useClosure);CHKERRQ(ierr); 2189 ierr = DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);CHKERRQ(ierr); 2190 ierr = DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);CHKERRQ(ierr); 2191 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2192 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2193 for (n = 0, p = pStart; p < pEnd; p++) { 2194 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2195 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2196 adjSize = PETSC_DETERMINE; 2197 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2198 for (a = 0; a < adjSize; ++a) { 2199 const PetscInt point = adj[a]; 2200 if (pStart <= point && point < pEnd) { 2201 PetscInt *PETSC_RESTRICT pBuf; 2202 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2203 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2204 *pBuf = point; 2205 } 2206 } 2207 n++; 2208 } 2209 ierr = DMPlexSetAdjacencyUseCone(dm, useCone);CHKERRQ(ierr); 2210 ierr = DMPlexSetAdjacencyUseClosure(dm, useClosure);CHKERRQ(ierr); 2211 /* Derive CSR graph from section/segbuffer */ 2212 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2213 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2214 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2215 for (idx = 0, p = pStart; p < pEnd; p++) { 2216 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2217 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2218 } 2219 xadj[n] = size; 2220 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2221 /* Clean up */ 2222 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2223 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2224 ierr = PetscFree(adj);CHKERRQ(ierr); 2225 graph->xadj = xadj; 2226 graph->adjncy = adjncy; 2227 } else { 2228 Mat A; 2229 PetscBool filter = PETSC_FALSE, isseqaij, flg_row; 2230 2231 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2232 if (!A->rmap->N || !A->cmap->N) { 2233 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2234 PetscFunctionReturn(0); 2235 } 2236 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2237 if (!isseqaij && filter) { 2238 PetscBool isseqdense; 2239 2240 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2241 if (!isseqdense) { 2242 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2243 } else { /* TODO: rectangular case and LDA */ 2244 PetscScalar *array; 2245 PetscReal chop=1.e-6; 2246 2247 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2248 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2249 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2250 for (i=0;i<n;i++) { 2251 PetscInt j; 2252 for (j=i+1;j<n;j++) { 2253 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2254 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2255 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2256 } 2257 } 2258 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2259 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2260 } 2261 } else { 2262 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2263 B = A; 2264 } 2265 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2266 2267 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2268 if (filter) { 2269 PetscScalar *data; 2270 PetscInt j,cum; 2271 2272 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2273 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2274 cum = 0; 2275 for (i=0;i<n;i++) { 2276 PetscInt t; 2277 2278 for (j=xadj[i];j<xadj[i+1];j++) { 2279 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2280 continue; 2281 } 2282 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2283 } 2284 t = xadj_filtered[i]; 2285 xadj_filtered[i] = cum; 2286 cum += t; 2287 } 2288 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2289 graph->xadj = xadj_filtered; 2290 graph->adjncy = adjncy_filtered; 2291 } else { 2292 graph->xadj = xadj; 2293 graph->adjncy = adjncy; 2294 } 2295 } 2296 /* compute local connected components using PCBDDCGraph */ 2297 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2298 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2299 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2300 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2301 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2302 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2303 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2304 2305 /* partial clean up */ 2306 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2307 if (B) { 2308 PetscBool flg_row; 2309 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2310 ierr = MatDestroy(&B);CHKERRQ(ierr); 2311 } 2312 if (isplex) { 2313 ierr = PetscFree(xadj);CHKERRQ(ierr); 2314 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2315 } 2316 2317 /* get back data */ 2318 if (isplex) { 2319 if (ncc) *ncc = graph->ncc; 2320 if (cc || primalv) { 2321 Mat A; 2322 PetscBT btv,btvt; 2323 PetscSection subSection; 2324 PetscInt *ids,cum,cump,*cids,*pids; 2325 2326 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2327 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2328 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2329 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2330 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2331 2332 cids[0] = 0; 2333 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2334 PetscInt j; 2335 2336 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2337 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2338 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2339 2340 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2341 for (k = 0; k < 2*size; k += 2) { 2342 PetscInt s, p = closure[k], off, dof, cdof; 2343 2344 ierr = PetscSectionGetConstraintDof(subSection, p, &cdof);CHKERRQ(ierr); 2345 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2346 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2347 for (s = 0; s < dof-cdof; s++) { 2348 if (PetscBTLookupSet(btvt,off+s)) continue; 2349 if (!PetscBTLookup(btv,off+s)) { 2350 ids[cum++] = off+s; 2351 } else { /* cross-vertex */ 2352 pids[cump++] = off+s; 2353 } 2354 } 2355 } 2356 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2357 } 2358 cids[i+1] = cum; 2359 /* mark dofs as already assigned */ 2360 for (j = cids[i]; j < cids[i+1]; j++) { 2361 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2362 } 2363 } 2364 if (cc) { 2365 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2366 for (i = 0; i < graph->ncc; i++) { 2367 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2368 } 2369 *cc = cc_n; 2370 } 2371 if (primalv) { 2372 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2373 } 2374 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2375 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2376 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2377 } 2378 } else { 2379 if (ncc) *ncc = graph->ncc; 2380 if (cc) { 2381 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2382 for (i=0;i<graph->ncc;i++) { 2383 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2384 } 2385 *cc = cc_n; 2386 } 2387 } 2388 /* clean up graph */ 2389 graph->xadj = 0; 2390 graph->adjncy = 0; 2391 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2392 PetscFunctionReturn(0); 2393 } 2394 2395 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2396 { 2397 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2398 PC_IS* pcis = (PC_IS*)(pc->data); 2399 IS dirIS = NULL; 2400 PetscInt i; 2401 PetscErrorCode ierr; 2402 2403 PetscFunctionBegin; 2404 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2405 if (zerodiag) { 2406 Mat A; 2407 Vec vec3_N; 2408 PetscScalar *vals; 2409 const PetscInt *idxs; 2410 PetscInt nz,*count; 2411 2412 /* p0 */ 2413 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2414 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2415 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2416 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2417 for (i=0;i<nz;i++) vals[i] = 1.; 2418 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2419 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2420 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2421 /* v_I */ 2422 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2423 for (i=0;i<nz;i++) vals[i] = 0.; 2424 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2425 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2426 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2427 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2428 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2429 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2430 if (dirIS) { 2431 PetscInt n; 2432 2433 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2434 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2435 for (i=0;i<n;i++) vals[i] = 0.; 2436 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2437 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2438 } 2439 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2440 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2441 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2442 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2443 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2444 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2445 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2446 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2447 ierr = PetscFree(vals);CHKERRQ(ierr); 2448 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2449 2450 /* there should not be any pressure dofs lying on the interface */ 2451 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2452 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2453 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2454 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2455 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2456 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2457 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2458 ierr = PetscFree(count);CHKERRQ(ierr); 2459 } 2460 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2461 2462 /* check PCBDDCBenignGetOrSetP0 */ 2463 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2464 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2465 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2466 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2467 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2468 for (i=0;i<pcbddc->benign_n;i++) { 2469 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2470 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2471 } 2472 PetscFunctionReturn(0); 2473 } 2474 2475 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2476 { 2477 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2478 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2479 PetscInt nz,n; 2480 PetscInt *interior_dofs,n_interior_dofs,nneu; 2481 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2482 PetscErrorCode ierr; 2483 2484 PetscFunctionBegin; 2485 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2486 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2487 for (n=0;n<pcbddc->benign_n;n++) { 2488 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2489 } 2490 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2491 pcbddc->benign_n = 0; 2492 2493 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2494 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2495 Checks if all the pressure dofs in each subdomain have a zero diagonal 2496 If not, a change of basis on pressures is not needed 2497 since the local Schur complements are already SPD 2498 */ 2499 has_null_pressures = PETSC_TRUE; 2500 have_null = PETSC_TRUE; 2501 if (pcbddc->n_ISForDofsLocal) { 2502 IS iP = NULL; 2503 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2504 2505 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2506 ierr = PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2507 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2508 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2509 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2510 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2511 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2512 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2513 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2514 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2515 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2516 if (iP) { 2517 IS newpressures; 2518 2519 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2520 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2521 pressures = newpressures; 2522 } 2523 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2524 if (!sorted) { 2525 ierr = ISSort(pressures);CHKERRQ(ierr); 2526 } 2527 } else { 2528 pressures = NULL; 2529 } 2530 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2531 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2532 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2533 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2534 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2535 if (!sorted) { 2536 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2537 } 2538 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2539 zerodiag_save = zerodiag; 2540 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2541 if (!nz) { 2542 if (n) have_null = PETSC_FALSE; 2543 has_null_pressures = PETSC_FALSE; 2544 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2545 } 2546 recompute_zerodiag = PETSC_FALSE; 2547 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2548 zerodiag_subs = NULL; 2549 pcbddc->benign_n = 0; 2550 n_interior_dofs = 0; 2551 interior_dofs = NULL; 2552 nneu = 0; 2553 if (pcbddc->NeumannBoundariesLocal) { 2554 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2555 } 2556 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2557 if (checkb) { /* need to compute interior nodes */ 2558 PetscInt n,i,j; 2559 PetscInt n_neigh,*neigh,*n_shared,**shared; 2560 PetscInt *iwork; 2561 2562 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2563 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2564 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2565 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2566 for (i=1;i<n_neigh;i++) 2567 for (j=0;j<n_shared[i];j++) 2568 iwork[shared[i][j]] += 1; 2569 for (i=0;i<n;i++) 2570 if (!iwork[i]) 2571 interior_dofs[n_interior_dofs++] = i; 2572 ierr = PetscFree(iwork);CHKERRQ(ierr); 2573 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2574 } 2575 if (has_null_pressures) { 2576 IS *subs; 2577 PetscInt nsubs,i,j,nl; 2578 const PetscInt *idxs; 2579 PetscScalar *array; 2580 Vec *work; 2581 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2582 2583 subs = pcbddc->local_subs; 2584 nsubs = pcbddc->n_local_subs; 2585 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2586 if (checkb) { 2587 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2588 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2589 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2590 /* work[0] = 1_p */ 2591 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2592 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2593 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2594 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2595 /* work[0] = 1_v */ 2596 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2597 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2598 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2599 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2600 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2601 } 2602 if (nsubs > 1) { 2603 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2604 for (i=0;i<nsubs;i++) { 2605 ISLocalToGlobalMapping l2g; 2606 IS t_zerodiag_subs; 2607 PetscInt nl; 2608 2609 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2610 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2611 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2612 if (nl) { 2613 PetscBool valid = PETSC_TRUE; 2614 2615 if (checkb) { 2616 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2617 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2618 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2619 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2620 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2621 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2622 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2623 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2624 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2625 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2626 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2627 for (j=0;j<n_interior_dofs;j++) { 2628 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2629 valid = PETSC_FALSE; 2630 break; 2631 } 2632 } 2633 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2634 } 2635 if (valid && nneu) { 2636 const PetscInt *idxs; 2637 PetscInt nzb; 2638 2639 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2640 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2641 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2642 if (nzb) valid = PETSC_FALSE; 2643 } 2644 if (valid && pressures) { 2645 IS t_pressure_subs; 2646 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2647 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2648 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2649 } 2650 if (valid) { 2651 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2652 pcbddc->benign_n++; 2653 } else { 2654 recompute_zerodiag = PETSC_TRUE; 2655 } 2656 } 2657 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2658 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2659 } 2660 } else { /* there's just one subdomain (or zero if they have not been detected */ 2661 PetscBool valid = PETSC_TRUE; 2662 2663 if (nneu) valid = PETSC_FALSE; 2664 if (valid && pressures) { 2665 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2666 } 2667 if (valid && checkb) { 2668 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2669 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2670 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2671 for (j=0;j<n_interior_dofs;j++) { 2672 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2673 valid = PETSC_FALSE; 2674 break; 2675 } 2676 } 2677 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2678 } 2679 if (valid) { 2680 pcbddc->benign_n = 1; 2681 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2682 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2683 zerodiag_subs[0] = zerodiag; 2684 } 2685 } 2686 if (checkb) { 2687 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2688 } 2689 } 2690 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2691 2692 if (!pcbddc->benign_n) { 2693 PetscInt n; 2694 2695 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2696 recompute_zerodiag = PETSC_FALSE; 2697 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2698 if (n) { 2699 has_null_pressures = PETSC_FALSE; 2700 have_null = PETSC_FALSE; 2701 } 2702 } 2703 2704 /* final check for null pressures */ 2705 if (zerodiag && pressures) { 2706 PetscInt nz,np; 2707 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2708 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2709 if (nz != np) have_null = PETSC_FALSE; 2710 } 2711 2712 if (recompute_zerodiag) { 2713 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2714 if (pcbddc->benign_n == 1) { 2715 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2716 zerodiag = zerodiag_subs[0]; 2717 } else { 2718 PetscInt i,nzn,*new_idxs; 2719 2720 nzn = 0; 2721 for (i=0;i<pcbddc->benign_n;i++) { 2722 PetscInt ns; 2723 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2724 nzn += ns; 2725 } 2726 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2727 nzn = 0; 2728 for (i=0;i<pcbddc->benign_n;i++) { 2729 PetscInt ns,*idxs; 2730 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2731 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2732 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2733 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2734 nzn += ns; 2735 } 2736 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2737 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2738 } 2739 have_null = PETSC_FALSE; 2740 } 2741 2742 /* Prepare matrix to compute no-net-flux */ 2743 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2744 Mat A,loc_divudotp; 2745 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2746 IS row,col,isused = NULL; 2747 PetscInt M,N,n,st,n_isused; 2748 2749 if (pressures) { 2750 isused = pressures; 2751 } else { 2752 isused = zerodiag_save; 2753 } 2754 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2755 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2756 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2757 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2758 n_isused = 0; 2759 if (isused) { 2760 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2761 } 2762 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2763 st = st-n_isused; 2764 if (n) { 2765 const PetscInt *gidxs; 2766 2767 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2768 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2769 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2770 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2771 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2772 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2773 } else { 2774 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2775 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2776 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2777 } 2778 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2779 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2780 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2781 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2782 ierr = ISDestroy(&row);CHKERRQ(ierr); 2783 ierr = ISDestroy(&col);CHKERRQ(ierr); 2784 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2785 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2786 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2787 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2788 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2789 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2790 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2791 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2792 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2793 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2794 } 2795 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2796 2797 /* change of basis and p0 dofs */ 2798 if (has_null_pressures) { 2799 IS zerodiagc; 2800 const PetscInt *idxs,*idxsc; 2801 PetscInt i,s,*nnz; 2802 2803 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2804 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2805 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2806 /* local change of basis for pressures */ 2807 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2808 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2809 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2810 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2811 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2812 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2813 for (i=0;i<pcbddc->benign_n;i++) { 2814 PetscInt nzs,j; 2815 2816 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2817 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2818 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2819 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2820 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2821 } 2822 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2823 ierr = PetscFree(nnz);CHKERRQ(ierr); 2824 /* set identity on velocities */ 2825 for (i=0;i<n-nz;i++) { 2826 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2827 } 2828 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2829 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2830 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2831 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2832 /* set change on pressures */ 2833 for (s=0;s<pcbddc->benign_n;s++) { 2834 PetscScalar *array; 2835 PetscInt nzs; 2836 2837 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2838 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2839 for (i=0;i<nzs-1;i++) { 2840 PetscScalar vals[2]; 2841 PetscInt cols[2]; 2842 2843 cols[0] = idxs[i]; 2844 cols[1] = idxs[nzs-1]; 2845 vals[0] = 1.; 2846 vals[1] = 1.; 2847 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2848 } 2849 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2850 for (i=0;i<nzs-1;i++) array[i] = -1.; 2851 array[nzs-1] = 1.; 2852 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2853 /* store local idxs for p0 */ 2854 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2855 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2856 ierr = PetscFree(array);CHKERRQ(ierr); 2857 } 2858 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2859 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2860 /* project if needed */ 2861 if (pcbddc->benign_change_explicit) { 2862 Mat M; 2863 2864 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2865 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2866 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2867 ierr = MatDestroy(&M);CHKERRQ(ierr); 2868 } 2869 /* store global idxs for p0 */ 2870 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2871 } 2872 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2873 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2874 2875 /* determines if the coarse solver will be singular or not */ 2876 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2877 /* determines if the problem has subdomains with 0 pressure block */ 2878 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2879 *zerodiaglocal = zerodiag; 2880 PetscFunctionReturn(0); 2881 } 2882 2883 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2884 { 2885 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2886 PetscScalar *array; 2887 PetscErrorCode ierr; 2888 2889 PetscFunctionBegin; 2890 if (!pcbddc->benign_sf) { 2891 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2892 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2893 } 2894 if (get) { 2895 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2896 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2897 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2898 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2899 } else { 2900 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2901 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2902 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2903 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2904 } 2905 PetscFunctionReturn(0); 2906 } 2907 2908 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2909 { 2910 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2911 PetscErrorCode ierr; 2912 2913 PetscFunctionBegin; 2914 /* TODO: add error checking 2915 - avoid nested pop (or push) calls. 2916 - cannot push before pop. 2917 - cannot call this if pcbddc->local_mat is NULL 2918 */ 2919 if (!pcbddc->benign_n) { 2920 PetscFunctionReturn(0); 2921 } 2922 if (pop) { 2923 if (pcbddc->benign_change_explicit) { 2924 IS is_p0; 2925 MatReuse reuse; 2926 2927 /* extract B_0 */ 2928 reuse = MAT_INITIAL_MATRIX; 2929 if (pcbddc->benign_B0) { 2930 reuse = MAT_REUSE_MATRIX; 2931 } 2932 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2933 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2934 /* remove rows and cols from local problem */ 2935 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2936 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2937 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2938 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2939 } else { 2940 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2941 PetscScalar *vals; 2942 PetscInt i,n,*idxs_ins; 2943 2944 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2945 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2946 if (!pcbddc->benign_B0) { 2947 PetscInt *nnz; 2948 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2949 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2950 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2951 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2952 for (i=0;i<pcbddc->benign_n;i++) { 2953 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2954 nnz[i] = n - nnz[i]; 2955 } 2956 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2957 ierr = PetscFree(nnz);CHKERRQ(ierr); 2958 } 2959 2960 for (i=0;i<pcbddc->benign_n;i++) { 2961 PetscScalar *array; 2962 PetscInt *idxs,j,nz,cum; 2963 2964 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2965 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2966 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2967 for (j=0;j<nz;j++) vals[j] = 1.; 2968 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2969 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2970 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2971 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2972 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2973 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2974 cum = 0; 2975 for (j=0;j<n;j++) { 2976 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2977 vals[cum] = array[j]; 2978 idxs_ins[cum] = j; 2979 cum++; 2980 } 2981 } 2982 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2983 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2984 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2985 } 2986 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2987 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2988 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2989 } 2990 } else { /* push */ 2991 if (pcbddc->benign_change_explicit) { 2992 PetscInt i; 2993 2994 for (i=0;i<pcbddc->benign_n;i++) { 2995 PetscScalar *B0_vals; 2996 PetscInt *B0_cols,B0_ncol; 2997 2998 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2999 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3000 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3001 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 3002 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3003 } 3004 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3005 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3006 } else { 3007 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 3008 } 3009 } 3010 PetscFunctionReturn(0); 3011 } 3012 3013 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3014 { 3015 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3016 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3017 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3018 PetscBLASInt *B_iwork,*B_ifail; 3019 PetscScalar *work,lwork; 3020 PetscScalar *St,*S,*eigv; 3021 PetscScalar *Sarray,*Starray; 3022 PetscReal *eigs,thresh; 3023 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3024 PetscBool allocated_S_St; 3025 #if defined(PETSC_USE_COMPLEX) 3026 PetscReal *rwork; 3027 #endif 3028 PetscErrorCode ierr; 3029 3030 PetscFunctionBegin; 3031 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3032 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3033 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\nRerun with -sub_schurs_hermitian 1 -sub_schurs_posdef 1 if the problem is SPD",sub_schurs->is_hermitian,sub_schurs->is_posdef); 3034 3035 if (pcbddc->dbg_flag) { 3036 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3037 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3038 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3039 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3040 } 3041 3042 if (pcbddc->dbg_flag) { 3043 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 3044 } 3045 3046 /* max size of subsets */ 3047 mss = 0; 3048 for (i=0;i<sub_schurs->n_subs;i++) { 3049 PetscInt subset_size; 3050 3051 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3052 mss = PetscMax(mss,subset_size); 3053 } 3054 3055 /* min/max and threshold */ 3056 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3057 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3058 nmax = PetscMax(nmin,nmax); 3059 allocated_S_St = PETSC_FALSE; 3060 if (nmin) { 3061 allocated_S_St = PETSC_TRUE; 3062 } 3063 3064 /* allocate lapack workspace */ 3065 cum = cum2 = 0; 3066 maxneigs = 0; 3067 for (i=0;i<sub_schurs->n_subs;i++) { 3068 PetscInt n,subset_size; 3069 3070 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3071 n = PetscMin(subset_size,nmax); 3072 cum += subset_size; 3073 cum2 += subset_size*n; 3074 maxneigs = PetscMax(maxneigs,n); 3075 } 3076 if (mss) { 3077 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 3078 PetscBLASInt B_itype = 1; 3079 PetscBLASInt B_N = mss; 3080 PetscReal zero = 0.0; 3081 PetscReal eps = 0.0; /* dlamch? */ 3082 3083 B_lwork = -1; 3084 S = NULL; 3085 St = NULL; 3086 eigs = NULL; 3087 eigv = NULL; 3088 B_iwork = NULL; 3089 B_ifail = NULL; 3090 #if defined(PETSC_USE_COMPLEX) 3091 rwork = NULL; 3092 #endif 3093 thresh = 1.0; 3094 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3095 #if defined(PETSC_USE_COMPLEX) 3096 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3097 #else 3098 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3099 #endif 3100 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3101 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3102 } else { 3103 /* TODO */ 3104 } 3105 } else { 3106 lwork = 0; 3107 } 3108 3109 nv = 0; 3110 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3111 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3112 } 3113 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3114 if (allocated_S_St) { 3115 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3116 } 3117 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3118 #if defined(PETSC_USE_COMPLEX) 3119 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3120 #endif 3121 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3122 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3123 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3124 nv+cum,&pcbddc->adaptive_constraints_idxs, 3125 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3126 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 3127 3128 maxneigs = 0; 3129 cum = cumarray = 0; 3130 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3131 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3132 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3133 const PetscInt *idxs; 3134 3135 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3136 for (cum=0;cum<nv;cum++) { 3137 pcbddc->adaptive_constraints_n[cum] = 1; 3138 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3139 pcbddc->adaptive_constraints_data[cum] = 1.0; 3140 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3141 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3142 } 3143 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3144 } 3145 3146 if (mss) { /* multilevel */ 3147 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3148 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3149 } 3150 3151 thresh = pcbddc->adaptive_threshold; 3152 for (i=0;i<sub_schurs->n_subs;i++) { 3153 const PetscInt *idxs; 3154 PetscReal upper,lower; 3155 PetscInt j,subset_size,eigs_start = 0; 3156 PetscBLASInt B_N; 3157 PetscBool same_data = PETSC_FALSE; 3158 3159 if (pcbddc->use_deluxe_scaling) { 3160 upper = PETSC_MAX_REAL; 3161 lower = thresh; 3162 } else { 3163 upper = 1./thresh; 3164 lower = 0.; 3165 } 3166 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3167 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3168 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3169 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3170 if (sub_schurs->is_hermitian) { 3171 PetscInt j,k; 3172 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 3173 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3174 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3175 } 3176 for (j=0;j<subset_size;j++) { 3177 for (k=j;k<subset_size;k++) { 3178 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3179 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3180 } 3181 } 3182 } else { 3183 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3184 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3185 } 3186 } else { 3187 S = Sarray + cumarray; 3188 St = Starray + cumarray; 3189 } 3190 /* see if we can save some work */ 3191 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3192 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 3193 } 3194 3195 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3196 B_neigs = 0; 3197 } else { 3198 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 3199 PetscBLASInt B_itype = 1; 3200 PetscBLASInt B_IL, B_IU; 3201 PetscReal eps = -1.0; /* dlamch? */ 3202 PetscInt nmin_s; 3203 PetscBool compute_range = PETSC_FALSE; 3204 3205 if (pcbddc->dbg_flag) { 3206 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]); 3207 } 3208 3209 compute_range = PETSC_FALSE; 3210 if (thresh > 1.+PETSC_SMALL && !same_data) { 3211 compute_range = PETSC_TRUE; 3212 } 3213 3214 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3215 if (compute_range) { 3216 3217 /* ask for eigenvalues larger than thresh */ 3218 #if defined(PETSC_USE_COMPLEX) 3219 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3220 #else 3221 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3222 #endif 3223 } else if (!same_data) { 3224 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3225 B_IL = 1; 3226 #if defined(PETSC_USE_COMPLEX) 3227 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3228 #else 3229 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3230 #endif 3231 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3232 PetscInt k; 3233 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3234 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3235 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3236 nmin = nmax; 3237 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 3238 for (k=0;k<nmax;k++) { 3239 eigs[k] = 1./PETSC_SMALL; 3240 eigv[k*(subset_size+1)] = 1.0; 3241 } 3242 } 3243 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3244 if (B_ierr) { 3245 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3246 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3247 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3248 } 3249 3250 if (B_neigs > nmax) { 3251 if (pcbddc->dbg_flag) { 3252 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 3253 } 3254 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 3255 B_neigs = nmax; 3256 } 3257 3258 nmin_s = PetscMin(nmin,B_N); 3259 if (B_neigs < nmin_s) { 3260 PetscBLASInt B_neigs2; 3261 3262 if (pcbddc->use_deluxe_scaling) { 3263 B_IL = B_N - nmin_s + 1; 3264 B_IU = B_N - B_neigs; 3265 } else { 3266 B_IL = B_neigs + 1; 3267 B_IU = nmin_s; 3268 } 3269 if (pcbddc->dbg_flag) { 3270 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 3271 } 3272 if (sub_schurs->is_hermitian) { 3273 PetscInt j,k; 3274 for (j=0;j<subset_size;j++) { 3275 for (k=j;k<subset_size;k++) { 3276 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3277 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3278 } 3279 } 3280 } else { 3281 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3282 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3283 } 3284 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3285 #if defined(PETSC_USE_COMPLEX) 3286 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3287 #else 3288 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3289 #endif 3290 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3291 B_neigs += B_neigs2; 3292 } 3293 if (B_ierr) { 3294 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3295 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3296 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3297 } 3298 if (pcbddc->dbg_flag) { 3299 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3300 for (j=0;j<B_neigs;j++) { 3301 if (eigs[j] == 0.0) { 3302 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3303 } else { 3304 if (pcbddc->use_deluxe_scaling) { 3305 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3306 } else { 3307 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3308 } 3309 } 3310 } 3311 } 3312 } else { 3313 /* TODO */ 3314 } 3315 } 3316 /* change the basis back to the original one */ 3317 if (sub_schurs->change) { 3318 Mat change,phi,phit; 3319 3320 if (pcbddc->dbg_flag > 2) { 3321 PetscInt ii; 3322 for (ii=0;ii<B_neigs;ii++) { 3323 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3324 for (j=0;j<B_N;j++) { 3325 #if defined(PETSC_USE_COMPLEX) 3326 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3327 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3328 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3329 #else 3330 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3331 #endif 3332 } 3333 } 3334 } 3335 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3336 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3337 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3338 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3339 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3340 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3341 } 3342 maxneigs = PetscMax(B_neigs,maxneigs); 3343 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3344 if (B_neigs) { 3345 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3346 3347 if (pcbddc->dbg_flag > 1) { 3348 PetscInt ii; 3349 for (ii=0;ii<B_neigs;ii++) { 3350 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3351 for (j=0;j<B_N;j++) { 3352 #if defined(PETSC_USE_COMPLEX) 3353 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3354 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3355 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3356 #else 3357 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3358 #endif 3359 } 3360 } 3361 } 3362 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3363 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3364 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3365 cum++; 3366 } 3367 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3368 /* shift for next computation */ 3369 cumarray += subset_size*subset_size; 3370 } 3371 if (pcbddc->dbg_flag) { 3372 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3373 } 3374 3375 if (mss) { 3376 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3377 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3378 /* destroy matrices (junk) */ 3379 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3380 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3381 } 3382 if (allocated_S_St) { 3383 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3384 } 3385 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3386 #if defined(PETSC_USE_COMPLEX) 3387 ierr = PetscFree(rwork);CHKERRQ(ierr); 3388 #endif 3389 if (pcbddc->dbg_flag) { 3390 PetscInt maxneigs_r; 3391 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3392 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3393 } 3394 PetscFunctionReturn(0); 3395 } 3396 3397 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3398 { 3399 PetscScalar *coarse_submat_vals; 3400 PetscErrorCode ierr; 3401 3402 PetscFunctionBegin; 3403 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3404 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3405 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3406 3407 /* Setup local neumann solver ksp_R */ 3408 /* PCBDDCSetUpLocalScatters should be called first! */ 3409 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3410 3411 /* 3412 Setup local correction and local part of coarse basis. 3413 Gives back the dense local part of the coarse matrix in column major ordering 3414 */ 3415 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3416 3417 /* Compute total number of coarse nodes and setup coarse solver */ 3418 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3419 3420 /* free */ 3421 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3422 PetscFunctionReturn(0); 3423 } 3424 3425 PetscErrorCode PCBDDCResetCustomization(PC pc) 3426 { 3427 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3428 PetscErrorCode ierr; 3429 3430 PetscFunctionBegin; 3431 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3432 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3433 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3434 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3435 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3436 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3437 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3438 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3439 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3440 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3441 PetscFunctionReturn(0); 3442 } 3443 3444 PetscErrorCode PCBDDCResetTopography(PC pc) 3445 { 3446 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3447 PetscInt i; 3448 PetscErrorCode ierr; 3449 3450 PetscFunctionBegin; 3451 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3452 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3453 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3454 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3455 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3456 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3457 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3458 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3459 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3460 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3461 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3462 for (i=0;i<pcbddc->n_local_subs;i++) { 3463 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3464 } 3465 pcbddc->n_local_subs = 0; 3466 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3467 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3468 pcbddc->graphanalyzed = PETSC_FALSE; 3469 pcbddc->recompute_topography = PETSC_TRUE; 3470 PetscFunctionReturn(0); 3471 } 3472 3473 PetscErrorCode PCBDDCResetSolvers(PC pc) 3474 { 3475 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3476 PetscErrorCode ierr; 3477 3478 PetscFunctionBegin; 3479 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3480 if (pcbddc->coarse_phi_B) { 3481 PetscScalar *array; 3482 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3483 ierr = PetscFree(array);CHKERRQ(ierr); 3484 } 3485 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3486 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3487 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3488 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3489 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3490 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3491 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3492 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3493 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3494 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3495 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3496 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3497 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3498 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3499 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3500 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3501 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3502 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3503 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3504 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3505 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3506 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3507 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3508 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3509 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3510 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3511 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3512 if (pcbddc->benign_zerodiag_subs) { 3513 PetscInt i; 3514 for (i=0;i<pcbddc->benign_n;i++) { 3515 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3516 } 3517 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3518 } 3519 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3520 PetscFunctionReturn(0); 3521 } 3522 3523 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3524 { 3525 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3526 PC_IS *pcis = (PC_IS*)pc->data; 3527 VecType impVecType; 3528 PetscInt n_constraints,n_R,old_size; 3529 PetscErrorCode ierr; 3530 3531 PetscFunctionBegin; 3532 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3533 n_R = pcis->n - pcbddc->n_vertices; 3534 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3535 /* local work vectors (try to avoid unneeded work)*/ 3536 /* R nodes */ 3537 old_size = -1; 3538 if (pcbddc->vec1_R) { 3539 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3540 } 3541 if (n_R != old_size) { 3542 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3543 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3544 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3545 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3546 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3547 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3548 } 3549 /* local primal dofs */ 3550 old_size = -1; 3551 if (pcbddc->vec1_P) { 3552 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3553 } 3554 if (pcbddc->local_primal_size != old_size) { 3555 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3556 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3557 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3558 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3559 } 3560 /* local explicit constraints */ 3561 old_size = -1; 3562 if (pcbddc->vec1_C) { 3563 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3564 } 3565 if (n_constraints && n_constraints != old_size) { 3566 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3567 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3568 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3569 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3570 } 3571 PetscFunctionReturn(0); 3572 } 3573 3574 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3575 { 3576 PetscErrorCode ierr; 3577 /* pointers to pcis and pcbddc */ 3578 PC_IS* pcis = (PC_IS*)pc->data; 3579 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3580 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3581 /* submatrices of local problem */ 3582 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3583 /* submatrices of local coarse problem */ 3584 Mat S_VV,S_CV,S_VC,S_CC; 3585 /* working matrices */ 3586 Mat C_CR; 3587 /* additional working stuff */ 3588 PC pc_R; 3589 Mat F,Brhs = NULL; 3590 Vec dummy_vec; 3591 PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs; 3592 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3593 PetscScalar *work; 3594 PetscInt *idx_V_B; 3595 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3596 PetscInt i,n_R,n_D,n_B; 3597 3598 /* some shortcuts to scalars */ 3599 PetscScalar one=1.0,m_one=-1.0; 3600 3601 PetscFunctionBegin; 3602 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3603 3604 /* Set Non-overlapping dimensions */ 3605 n_vertices = pcbddc->n_vertices; 3606 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3607 n_B = pcis->n_B; 3608 n_D = pcis->n - n_B; 3609 n_R = pcis->n - n_vertices; 3610 3611 /* vertices in boundary numbering */ 3612 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3613 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3614 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3615 3616 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3617 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3618 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3619 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3620 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3621 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3622 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3623 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3624 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3625 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3626 3627 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3628 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3629 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3630 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3631 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3632 lda_rhs = n_R; 3633 need_benign_correction = PETSC_FALSE; 3634 if (isLU || isILU || isCHOL) { 3635 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3636 } else if (sub_schurs && sub_schurs->reuse_solver) { 3637 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3638 MatFactorType type; 3639 3640 F = reuse_solver->F; 3641 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3642 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3643 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3644 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3645 } else { 3646 F = NULL; 3647 } 3648 3649 /* determine if we can use a sparse right-hand side */ 3650 sparserhs = PETSC_FALSE; 3651 if (F) { 3652 MatSolverType solver; 3653 3654 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3655 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3656 } 3657 3658 /* allocate workspace */ 3659 n = 0; 3660 if (n_constraints) { 3661 n += lda_rhs*n_constraints; 3662 } 3663 if (n_vertices) { 3664 n = PetscMax(2*lda_rhs*n_vertices,n); 3665 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3666 } 3667 if (!pcbddc->symmetric_primal) { 3668 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3669 } 3670 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3671 3672 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3673 dummy_vec = NULL; 3674 if (need_benign_correction && lda_rhs != n_R && F) { 3675 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3676 } 3677 3678 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3679 if (n_constraints) { 3680 Mat M3,C_B; 3681 IS is_aux; 3682 PetscScalar *array,*array2; 3683 3684 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3685 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3686 3687 /* Extract constraints on R nodes: C_{CR} */ 3688 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3689 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3690 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3691 3692 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3693 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3694 if (!sparserhs) { 3695 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3696 for (i=0;i<n_constraints;i++) { 3697 const PetscScalar *row_cmat_values; 3698 const PetscInt *row_cmat_indices; 3699 PetscInt size_of_constraint,j; 3700 3701 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3702 for (j=0;j<size_of_constraint;j++) { 3703 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3704 } 3705 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3706 } 3707 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 3708 } else { 3709 Mat tC_CR; 3710 3711 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3712 if (lda_rhs != n_R) { 3713 PetscScalar *aa; 3714 PetscInt r,*ii,*jj; 3715 PetscBool done; 3716 3717 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3718 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 3719 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 3720 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 3721 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3722 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 3723 } else { 3724 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 3725 tC_CR = C_CR; 3726 } 3727 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 3728 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 3729 } 3730 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3731 if (F) { 3732 if (need_benign_correction) { 3733 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3734 3735 /* rhs is already zero on interior dofs, no need to change the rhs */ 3736 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3737 } 3738 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 3739 if (need_benign_correction) { 3740 PetscScalar *marr; 3741 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3742 3743 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3744 if (lda_rhs != n_R) { 3745 for (i=0;i<n_constraints;i++) { 3746 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3747 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3748 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3749 } 3750 } else { 3751 for (i=0;i<n_constraints;i++) { 3752 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3753 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3754 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3755 } 3756 } 3757 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3758 } 3759 } else { 3760 PetscScalar *marr; 3761 3762 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3763 for (i=0;i<n_constraints;i++) { 3764 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3765 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3766 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3767 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3768 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3769 } 3770 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3771 } 3772 if (sparserhs) { 3773 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3774 } 3775 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3776 if (!pcbddc->switch_static) { 3777 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3778 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3779 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3780 for (i=0;i<n_constraints;i++) { 3781 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3782 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3783 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3784 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3785 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3786 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3787 } 3788 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3789 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3790 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3791 } else { 3792 if (lda_rhs != n_R) { 3793 IS dummy; 3794 3795 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3796 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3797 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3798 } else { 3799 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3800 pcbddc->local_auxmat2 = local_auxmat2_R; 3801 } 3802 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3803 } 3804 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3805 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3806 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3807 if (isCHOL) { 3808 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3809 } else { 3810 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3811 } 3812 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 3813 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3814 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3815 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3816 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3817 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3818 } 3819 3820 /* Get submatrices from subdomain matrix */ 3821 if (n_vertices) { 3822 IS is_aux; 3823 PetscBool isseqaij; 3824 3825 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3826 IS tis; 3827 3828 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3829 ierr = ISSort(tis);CHKERRQ(ierr); 3830 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3831 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3832 } else { 3833 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3834 } 3835 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3836 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3837 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3838 if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */ 3839 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3840 } 3841 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3842 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3843 } 3844 3845 /* Matrix of coarse basis functions (local) */ 3846 if (pcbddc->coarse_phi_B) { 3847 PetscInt on_B,on_primal,on_D=n_D; 3848 if (pcbddc->coarse_phi_D) { 3849 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3850 } 3851 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3852 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3853 PetscScalar *marray; 3854 3855 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3856 ierr = PetscFree(marray);CHKERRQ(ierr); 3857 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3858 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3859 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3860 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3861 } 3862 } 3863 3864 if (!pcbddc->coarse_phi_B) { 3865 PetscScalar *marr; 3866 3867 /* memory size */ 3868 n = n_B*pcbddc->local_primal_size; 3869 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 3870 if (!pcbddc->symmetric_primal) n *= 2; 3871 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 3872 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3873 marr += n_B*pcbddc->local_primal_size; 3874 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3875 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3876 marr += n_D*pcbddc->local_primal_size; 3877 } 3878 if (!pcbddc->symmetric_primal) { 3879 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3880 marr += n_B*pcbddc->local_primal_size; 3881 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3882 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3883 } 3884 } else { 3885 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3886 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3887 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3888 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3889 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3890 } 3891 } 3892 } 3893 3894 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3895 p0_lidx_I = NULL; 3896 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3897 const PetscInt *idxs; 3898 3899 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3900 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3901 for (i=0;i<pcbddc->benign_n;i++) { 3902 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3903 } 3904 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3905 } 3906 3907 /* vertices */ 3908 if (n_vertices) { 3909 PetscBool restoreavr = PETSC_FALSE; 3910 3911 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3912 3913 if (n_R) { 3914 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3915 PetscBLASInt B_N,B_one = 1; 3916 PetscScalar *x,*y; 3917 3918 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3919 if (need_benign_correction) { 3920 ISLocalToGlobalMapping RtoN; 3921 IS is_p0; 3922 PetscInt *idxs_p0,n; 3923 3924 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3925 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3926 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3927 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3928 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3929 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3930 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3931 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3932 } 3933 3934 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3935 if (!sparserhs || need_benign_correction) { 3936 if (lda_rhs == n_R) { 3937 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3938 } else { 3939 PetscScalar *av,*array; 3940 const PetscInt *xadj,*adjncy; 3941 PetscInt n; 3942 PetscBool flg_row; 3943 3944 array = work+lda_rhs*n_vertices; 3945 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3946 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3947 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3948 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3949 for (i=0;i<n;i++) { 3950 PetscInt j; 3951 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3952 } 3953 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3954 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3955 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3956 } 3957 if (need_benign_correction) { 3958 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3959 PetscScalar *marr; 3960 3961 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3962 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3963 3964 | 0 0 0 | (V) 3965 L = | 0 0 -1 | (P-p0) 3966 | 0 0 -1 | (p0) 3967 3968 */ 3969 for (i=0;i<reuse_solver->benign_n;i++) { 3970 const PetscScalar *vals; 3971 const PetscInt *idxs,*idxs_zero; 3972 PetscInt n,j,nz; 3973 3974 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3975 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3976 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3977 for (j=0;j<n;j++) { 3978 PetscScalar val = vals[j]; 3979 PetscInt k,col = idxs[j]; 3980 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3981 } 3982 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3983 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3984 } 3985 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3986 } 3987 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 3988 Brhs = A_RV; 3989 } else { 3990 Mat tA_RVT,A_RVT; 3991 3992 if (!pcbddc->symmetric_primal) { 3993 /* A_RV already scaled by -1 */ 3994 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 3995 } else { 3996 restoreavr = PETSC_TRUE; 3997 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 3998 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 3999 A_RVT = A_VR; 4000 } 4001 if (lda_rhs != n_R) { 4002 PetscScalar *aa; 4003 PetscInt r,*ii,*jj; 4004 PetscBool done; 4005 4006 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4007 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4008 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4009 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4010 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4011 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4012 } else { 4013 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4014 tA_RVT = A_RVT; 4015 } 4016 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4017 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4018 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4019 } 4020 if (F) { 4021 /* need to correct the rhs */ 4022 if (need_benign_correction) { 4023 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4024 PetscScalar *marr; 4025 4026 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4027 if (lda_rhs != n_R) { 4028 for (i=0;i<n_vertices;i++) { 4029 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4030 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4031 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4032 } 4033 } else { 4034 for (i=0;i<n_vertices;i++) { 4035 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4036 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4037 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4038 } 4039 } 4040 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4041 } 4042 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4043 if (restoreavr) { 4044 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4045 } 4046 /* need to correct the solution */ 4047 if (need_benign_correction) { 4048 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4049 PetscScalar *marr; 4050 4051 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4052 if (lda_rhs != n_R) { 4053 for (i=0;i<n_vertices;i++) { 4054 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4055 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4056 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4057 } 4058 } else { 4059 for (i=0;i<n_vertices;i++) { 4060 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4061 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4062 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4063 } 4064 } 4065 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4066 } 4067 } else { 4068 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4069 for (i=0;i<n_vertices;i++) { 4070 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4071 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4072 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4073 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4074 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4075 } 4076 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4077 } 4078 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4079 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4080 /* S_VV and S_CV */ 4081 if (n_constraints) { 4082 Mat B; 4083 4084 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4085 for (i=0;i<n_vertices;i++) { 4086 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4087 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4088 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4089 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4090 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4091 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4092 } 4093 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4094 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 4095 ierr = MatDestroy(&B);CHKERRQ(ierr); 4096 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4097 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4098 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4099 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4100 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4101 ierr = MatDestroy(&B);CHKERRQ(ierr); 4102 } 4103 if (lda_rhs != n_R) { 4104 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4105 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4106 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4107 } 4108 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4109 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4110 if (need_benign_correction) { 4111 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4112 PetscScalar *marr,*sums; 4113 4114 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4115 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4116 for (i=0;i<reuse_solver->benign_n;i++) { 4117 const PetscScalar *vals; 4118 const PetscInt *idxs,*idxs_zero; 4119 PetscInt n,j,nz; 4120 4121 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4122 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4123 for (j=0;j<n_vertices;j++) { 4124 PetscInt k; 4125 sums[j] = 0.; 4126 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4127 } 4128 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4129 for (j=0;j<n;j++) { 4130 PetscScalar val = vals[j]; 4131 PetscInt k; 4132 for (k=0;k<n_vertices;k++) { 4133 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4134 } 4135 } 4136 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4137 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4138 } 4139 ierr = PetscFree(sums);CHKERRQ(ierr); 4140 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4141 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4142 } 4143 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4144 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4145 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 4146 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4147 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4148 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 4149 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4150 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4151 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4152 } else { 4153 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4154 } 4155 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4156 4157 /* coarse basis functions */ 4158 for (i=0;i<n_vertices;i++) { 4159 PetscScalar *y; 4160 4161 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4162 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4163 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4164 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4165 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4166 y[n_B*i+idx_V_B[i]] = 1.0; 4167 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4168 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4169 4170 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4171 PetscInt j; 4172 4173 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4174 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4175 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4176 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4177 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4178 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4179 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4180 } 4181 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4182 } 4183 /* if n_R == 0 the object is not destroyed */ 4184 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4185 } 4186 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4187 4188 if (n_constraints) { 4189 Mat B; 4190 4191 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4192 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4193 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4194 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4195 if (n_vertices) { 4196 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4197 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4198 } else { 4199 Mat S_VCt; 4200 4201 if (lda_rhs != n_R) { 4202 ierr = MatDestroy(&B);CHKERRQ(ierr); 4203 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4204 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4205 } 4206 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4207 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4208 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4209 } 4210 } 4211 ierr = MatDestroy(&B);CHKERRQ(ierr); 4212 /* coarse basis functions */ 4213 for (i=0;i<n_constraints;i++) { 4214 PetscScalar *y; 4215 4216 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4217 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4218 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4219 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4220 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4221 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4222 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4223 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4224 PetscInt j; 4225 4226 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4227 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4228 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4229 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4230 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4231 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4232 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4233 } 4234 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4235 } 4236 } 4237 if (n_constraints) { 4238 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4239 } 4240 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4241 4242 /* coarse matrix entries relative to B_0 */ 4243 if (pcbddc->benign_n) { 4244 Mat B0_B,B0_BPHI; 4245 IS is_dummy; 4246 PetscScalar *data; 4247 PetscInt j; 4248 4249 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4250 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4251 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4252 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4253 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4254 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 4255 for (j=0;j<pcbddc->benign_n;j++) { 4256 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4257 for (i=0;i<pcbddc->local_primal_size;i++) { 4258 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4259 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4260 } 4261 } 4262 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 4263 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4264 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4265 } 4266 4267 /* compute other basis functions for non-symmetric problems */ 4268 if (!pcbddc->symmetric_primal) { 4269 Mat B_V=NULL,B_C=NULL; 4270 PetscScalar *marray; 4271 4272 if (n_constraints) { 4273 Mat S_CCT,C_CRT; 4274 4275 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4276 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4277 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4278 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4279 if (n_vertices) { 4280 Mat S_VCT; 4281 4282 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4283 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4284 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4285 } 4286 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4287 } else { 4288 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4289 } 4290 if (n_vertices && n_R) { 4291 PetscScalar *av,*marray; 4292 const PetscInt *xadj,*adjncy; 4293 PetscInt n; 4294 PetscBool flg_row; 4295 4296 /* B_V = B_V - A_VR^T */ 4297 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4298 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4299 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4300 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4301 for (i=0;i<n;i++) { 4302 PetscInt j; 4303 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4304 } 4305 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4306 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4307 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4308 } 4309 4310 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4311 if (n_vertices) { 4312 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4313 for (i=0;i<n_vertices;i++) { 4314 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4315 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4316 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4317 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4318 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4319 } 4320 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4321 } 4322 if (B_C) { 4323 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4324 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4325 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4326 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4327 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4328 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4329 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4330 } 4331 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4332 } 4333 /* coarse basis functions */ 4334 for (i=0;i<pcbddc->local_primal_size;i++) { 4335 PetscScalar *y; 4336 4337 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4338 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4339 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4340 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4341 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4342 if (i<n_vertices) { 4343 y[n_B*i+idx_V_B[i]] = 1.0; 4344 } 4345 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4346 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4347 4348 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4349 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4350 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4351 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4352 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4353 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4354 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4355 } 4356 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4357 } 4358 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4359 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4360 } 4361 4362 /* free memory */ 4363 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4364 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4365 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4366 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4367 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4368 ierr = PetscFree(work);CHKERRQ(ierr); 4369 if (n_vertices) { 4370 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4371 } 4372 if (n_constraints) { 4373 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4374 } 4375 /* Checking coarse_sub_mat and coarse basis functios */ 4376 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4377 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4378 if (pcbddc->dbg_flag) { 4379 Mat coarse_sub_mat; 4380 Mat AUXMAT,TM1,TM2,TM3,TM4; 4381 Mat coarse_phi_D,coarse_phi_B; 4382 Mat coarse_psi_D,coarse_psi_B; 4383 Mat A_II,A_BB,A_IB,A_BI; 4384 Mat C_B,CPHI; 4385 IS is_dummy; 4386 Vec mones; 4387 MatType checkmattype=MATSEQAIJ; 4388 PetscReal real_value; 4389 4390 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4391 Mat A; 4392 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4393 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4394 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4395 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4396 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4397 ierr = MatDestroy(&A);CHKERRQ(ierr); 4398 } else { 4399 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4400 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4401 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4402 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4403 } 4404 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4405 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4406 if (!pcbddc->symmetric_primal) { 4407 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4408 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4409 } 4410 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4411 4412 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4413 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4414 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4415 if (!pcbddc->symmetric_primal) { 4416 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4417 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4418 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4419 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4420 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4421 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4422 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4423 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4424 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4425 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4426 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4427 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4428 } else { 4429 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4430 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4431 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4432 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4433 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4434 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4435 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4436 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4437 } 4438 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4439 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4440 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4441 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4442 if (pcbddc->benign_n) { 4443 Mat B0_B,B0_BPHI; 4444 PetscScalar *data,*data2; 4445 PetscInt j; 4446 4447 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4448 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4449 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4450 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4451 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4452 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4453 for (j=0;j<pcbddc->benign_n;j++) { 4454 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4455 for (i=0;i<pcbddc->local_primal_size;i++) { 4456 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4457 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4458 } 4459 } 4460 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4461 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4462 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4463 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4464 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4465 } 4466 #if 0 4467 { 4468 PetscViewer viewer; 4469 char filename[256]; 4470 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4471 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4472 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4473 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4474 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4475 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4476 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4477 if (pcbddc->coarse_phi_B) { 4478 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4479 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4480 } 4481 if (pcbddc->coarse_phi_D) { 4482 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4483 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4484 } 4485 if (pcbddc->coarse_psi_B) { 4486 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4487 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4488 } 4489 if (pcbddc->coarse_psi_D) { 4490 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4491 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4492 } 4493 ierr = PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");CHKERRQ(ierr); 4494 ierr = MatView(pcbddc->local_mat,viewer);CHKERRQ(ierr); 4495 ierr = PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");CHKERRQ(ierr); 4496 ierr = MatView(pcbddc->ConstraintMatrix,viewer);CHKERRQ(ierr); 4497 ierr = PetscObjectSetName((PetscObject)pcis->is_I_local,"I");CHKERRQ(ierr); 4498 ierr = ISView(pcis->is_I_local,viewer);CHKERRQ(ierr); 4499 ierr = PetscObjectSetName((PetscObject)pcis->is_B_local,"B");CHKERRQ(ierr); 4500 ierr = ISView(pcis->is_B_local,viewer);CHKERRQ(ierr); 4501 ierr = PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");CHKERRQ(ierr); 4502 ierr = ISView(pcbddc->is_R_local,viewer);CHKERRQ(ierr); 4503 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4504 } 4505 #endif 4506 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4507 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4508 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4509 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4510 4511 /* check constraints */ 4512 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4513 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4514 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4515 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4516 } else { 4517 PetscScalar *data; 4518 Mat tmat; 4519 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4520 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4521 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4522 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4523 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4524 } 4525 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4526 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4527 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4528 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4529 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4530 if (!pcbddc->symmetric_primal) { 4531 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4532 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4533 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4534 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4535 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4536 } 4537 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4538 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4539 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4540 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4541 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4542 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4543 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4544 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4545 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4546 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4547 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4548 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4549 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4550 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4551 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4552 if (!pcbddc->symmetric_primal) { 4553 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4554 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4555 } 4556 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4557 } 4558 /* get back data */ 4559 *coarse_submat_vals_n = coarse_submat_vals; 4560 PetscFunctionReturn(0); 4561 } 4562 4563 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4564 { 4565 Mat *work_mat; 4566 IS isrow_s,iscol_s; 4567 PetscBool rsorted,csorted; 4568 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4569 PetscErrorCode ierr; 4570 4571 PetscFunctionBegin; 4572 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4573 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4574 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4575 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4576 4577 if (!rsorted) { 4578 const PetscInt *idxs; 4579 PetscInt *idxs_sorted,i; 4580 4581 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4582 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4583 for (i=0;i<rsize;i++) { 4584 idxs_perm_r[i] = i; 4585 } 4586 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4587 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4588 for (i=0;i<rsize;i++) { 4589 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4590 } 4591 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4592 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4593 } else { 4594 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4595 isrow_s = isrow; 4596 } 4597 4598 if (!csorted) { 4599 if (isrow == iscol) { 4600 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4601 iscol_s = isrow_s; 4602 } else { 4603 const PetscInt *idxs; 4604 PetscInt *idxs_sorted,i; 4605 4606 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4607 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4608 for (i=0;i<csize;i++) { 4609 idxs_perm_c[i] = i; 4610 } 4611 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4612 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4613 for (i=0;i<csize;i++) { 4614 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4615 } 4616 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4617 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4618 } 4619 } else { 4620 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4621 iscol_s = iscol; 4622 } 4623 4624 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4625 4626 if (!rsorted || !csorted) { 4627 Mat new_mat; 4628 IS is_perm_r,is_perm_c; 4629 4630 if (!rsorted) { 4631 PetscInt *idxs_r,i; 4632 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4633 for (i=0;i<rsize;i++) { 4634 idxs_r[idxs_perm_r[i]] = i; 4635 } 4636 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4637 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4638 } else { 4639 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4640 } 4641 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4642 4643 if (!csorted) { 4644 if (isrow_s == iscol_s) { 4645 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4646 is_perm_c = is_perm_r; 4647 } else { 4648 PetscInt *idxs_c,i; 4649 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4650 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4651 for (i=0;i<csize;i++) { 4652 idxs_c[idxs_perm_c[i]] = i; 4653 } 4654 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4655 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4656 } 4657 } else { 4658 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4659 } 4660 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4661 4662 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4663 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4664 work_mat[0] = new_mat; 4665 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4666 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4667 } 4668 4669 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4670 *B = work_mat[0]; 4671 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4672 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4673 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4674 PetscFunctionReturn(0); 4675 } 4676 4677 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4678 { 4679 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4680 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4681 Mat new_mat,lA; 4682 IS is_local,is_global; 4683 PetscInt local_size; 4684 PetscBool isseqaij; 4685 PetscErrorCode ierr; 4686 4687 PetscFunctionBegin; 4688 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4689 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4690 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4691 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4692 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4693 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4694 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4695 4696 /* check */ 4697 if (pcbddc->dbg_flag) { 4698 Vec x,x_change; 4699 PetscReal error; 4700 4701 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4702 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4703 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4704 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4705 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4706 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4707 if (!pcbddc->change_interior) { 4708 const PetscScalar *x,*y,*v; 4709 PetscReal lerror = 0.; 4710 PetscInt i; 4711 4712 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4713 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4714 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4715 for (i=0;i<local_size;i++) 4716 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4717 lerror = PetscAbsScalar(x[i]-y[i]); 4718 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4719 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4720 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4721 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4722 if (error > PETSC_SMALL) { 4723 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4724 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4725 } else { 4726 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4727 } 4728 } 4729 } 4730 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4731 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4732 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4733 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4734 if (error > PETSC_SMALL) { 4735 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4736 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4737 } else { 4738 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4739 } 4740 } 4741 ierr = VecDestroy(&x);CHKERRQ(ierr); 4742 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4743 } 4744 4745 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4746 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4747 4748 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4749 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4750 if (isseqaij) { 4751 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4752 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4753 if (lA) { 4754 Mat work; 4755 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4756 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4757 ierr = MatDestroy(&work);CHKERRQ(ierr); 4758 } 4759 } else { 4760 Mat work_mat; 4761 4762 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4763 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4764 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4765 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4766 if (lA) { 4767 Mat work; 4768 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4769 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4770 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4771 ierr = MatDestroy(&work);CHKERRQ(ierr); 4772 } 4773 } 4774 if (matis->A->symmetric_set) { 4775 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4776 #if !defined(PETSC_USE_COMPLEX) 4777 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4778 #endif 4779 } 4780 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4781 PetscFunctionReturn(0); 4782 } 4783 4784 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4785 { 4786 PC_IS* pcis = (PC_IS*)(pc->data); 4787 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4788 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4789 PetscInt *idx_R_local=NULL; 4790 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4791 PetscInt vbs,bs; 4792 PetscBT bitmask=NULL; 4793 PetscErrorCode ierr; 4794 4795 PetscFunctionBegin; 4796 /* 4797 No need to setup local scatters if 4798 - primal space is unchanged 4799 AND 4800 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4801 AND 4802 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4803 */ 4804 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4805 PetscFunctionReturn(0); 4806 } 4807 /* destroy old objects */ 4808 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4809 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4810 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4811 /* Set Non-overlapping dimensions */ 4812 n_B = pcis->n_B; 4813 n_D = pcis->n - n_B; 4814 n_vertices = pcbddc->n_vertices; 4815 4816 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4817 4818 /* create auxiliary bitmask and allocate workspace */ 4819 if (!sub_schurs || !sub_schurs->reuse_solver) { 4820 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4821 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4822 for (i=0;i<n_vertices;i++) { 4823 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4824 } 4825 4826 for (i=0, n_R=0; i<pcis->n; i++) { 4827 if (!PetscBTLookup(bitmask,i)) { 4828 idx_R_local[n_R++] = i; 4829 } 4830 } 4831 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4832 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4833 4834 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4835 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4836 } 4837 4838 /* Block code */ 4839 vbs = 1; 4840 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4841 if (bs>1 && !(n_vertices%bs)) { 4842 PetscBool is_blocked = PETSC_TRUE; 4843 PetscInt *vary; 4844 if (!sub_schurs || !sub_schurs->reuse_solver) { 4845 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4846 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4847 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4848 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4849 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4850 for (i=0; i<pcis->n/bs; i++) { 4851 if (vary[i]!=0 && vary[i]!=bs) { 4852 is_blocked = PETSC_FALSE; 4853 break; 4854 } 4855 } 4856 ierr = PetscFree(vary);CHKERRQ(ierr); 4857 } else { 4858 /* Verify directly the R set */ 4859 for (i=0; i<n_R/bs; i++) { 4860 PetscInt j,node=idx_R_local[bs*i]; 4861 for (j=1; j<bs; j++) { 4862 if (node != idx_R_local[bs*i+j]-j) { 4863 is_blocked = PETSC_FALSE; 4864 break; 4865 } 4866 } 4867 } 4868 } 4869 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4870 vbs = bs; 4871 for (i=0;i<n_R/vbs;i++) { 4872 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4873 } 4874 } 4875 } 4876 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4877 if (sub_schurs && sub_schurs->reuse_solver) { 4878 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4879 4880 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4881 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4882 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4883 reuse_solver->is_R = pcbddc->is_R_local; 4884 } else { 4885 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4886 } 4887 4888 /* print some info if requested */ 4889 if (pcbddc->dbg_flag) { 4890 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4891 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4892 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4893 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4894 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4895 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4896 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4897 } 4898 4899 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4900 if (!sub_schurs || !sub_schurs->reuse_solver) { 4901 IS is_aux1,is_aux2; 4902 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4903 4904 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4905 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4906 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4907 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4908 for (i=0; i<n_D; i++) { 4909 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4910 } 4911 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4912 for (i=0, j=0; i<n_R; i++) { 4913 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4914 aux_array1[j++] = i; 4915 } 4916 } 4917 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4918 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4919 for (i=0, j=0; i<n_B; i++) { 4920 if (!PetscBTLookup(bitmask,is_indices[i])) { 4921 aux_array2[j++] = i; 4922 } 4923 } 4924 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4925 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4926 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4927 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4928 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4929 4930 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4931 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4932 for (i=0, j=0; i<n_R; i++) { 4933 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4934 aux_array1[j++] = i; 4935 } 4936 } 4937 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4938 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4939 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4940 } 4941 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4942 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4943 } else { 4944 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4945 IS tis; 4946 PetscInt schur_size; 4947 4948 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4949 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4950 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4951 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4952 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4953 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4954 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4955 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4956 } 4957 } 4958 PetscFunctionReturn(0); 4959 } 4960 4961 4962 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4963 { 4964 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4965 PC_IS *pcis = (PC_IS*)pc->data; 4966 PC pc_temp; 4967 Mat A_RR; 4968 MatReuse reuse; 4969 PetscScalar m_one = -1.0; 4970 PetscReal value; 4971 PetscInt n_D,n_R; 4972 PetscBool check_corr,issbaij; 4973 PetscErrorCode ierr; 4974 /* prefixes stuff */ 4975 char dir_prefix[256],neu_prefix[256],str_level[16]; 4976 size_t len; 4977 4978 PetscFunctionBegin; 4979 4980 /* compute prefixes */ 4981 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4982 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4983 if (!pcbddc->current_level) { 4984 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4985 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4986 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4987 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4988 } else { 4989 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 4990 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4991 len -= 15; /* remove "pc_bddc_coarse_" */ 4992 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4993 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4994 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4995 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4996 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4997 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4998 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4999 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 5000 } 5001 5002 /* DIRICHLET PROBLEM */ 5003 if (dirichlet) { 5004 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5005 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5006 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 5007 if (pcbddc->dbg_flag) { 5008 Mat A_IIn; 5009 5010 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5011 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5012 pcis->A_II = A_IIn; 5013 } 5014 } 5015 if (pcbddc->local_mat->symmetric_set) { 5016 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5017 } 5018 /* Matrix for Dirichlet problem is pcis->A_II */ 5019 n_D = pcis->n - pcis->n_B; 5020 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5021 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5022 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5023 /* default */ 5024 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5025 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5026 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5027 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5028 if (issbaij) { 5029 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5030 } else { 5031 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5032 } 5033 /* Allow user's customization */ 5034 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5035 } 5036 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 5037 if (sub_schurs && sub_schurs->reuse_solver) { 5038 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5039 5040 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5041 } 5042 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5043 if (!n_D) { 5044 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5045 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5046 } 5047 /* Set Up KSP for Dirichlet problem of BDDC */ 5048 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 5049 /* set ksp_D into pcis data */ 5050 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5051 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5052 pcis->ksp_D = pcbddc->ksp_D; 5053 } 5054 5055 /* NEUMANN PROBLEM */ 5056 A_RR = 0; 5057 if (neumann) { 5058 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5059 PetscInt ibs,mbs; 5060 PetscBool issbaij, reuse_neumann_solver; 5061 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5062 5063 reuse_neumann_solver = PETSC_FALSE; 5064 if (sub_schurs && sub_schurs->reuse_solver) { 5065 IS iP; 5066 5067 reuse_neumann_solver = PETSC_TRUE; 5068 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5069 if (iP) reuse_neumann_solver = PETSC_FALSE; 5070 } 5071 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5072 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5073 if (pcbddc->ksp_R) { /* already created ksp */ 5074 PetscInt nn_R; 5075 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5076 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5077 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5078 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5079 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5080 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5081 reuse = MAT_INITIAL_MATRIX; 5082 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5083 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5084 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5085 reuse = MAT_INITIAL_MATRIX; 5086 } else { /* safe to reuse the matrix */ 5087 reuse = MAT_REUSE_MATRIX; 5088 } 5089 } 5090 /* last check */ 5091 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5092 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5093 reuse = MAT_INITIAL_MATRIX; 5094 } 5095 } else { /* first time, so we need to create the matrix */ 5096 reuse = MAT_INITIAL_MATRIX; 5097 } 5098 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 5099 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5100 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5101 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5102 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5103 if (matis->A == pcbddc->local_mat) { 5104 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5105 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5106 } else { 5107 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5108 } 5109 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5110 if (matis->A == pcbddc->local_mat) { 5111 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5112 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5113 } else { 5114 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5115 } 5116 } 5117 /* extract A_RR */ 5118 if (reuse_neumann_solver) { 5119 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5120 5121 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5122 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5123 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5124 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5125 } else { 5126 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5127 } 5128 } else { 5129 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5130 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5131 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5132 } 5133 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5134 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5135 } 5136 if (pcbddc->local_mat->symmetric_set) { 5137 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5138 } 5139 if (!pcbddc->ksp_R) { /* create object if not present */ 5140 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5141 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5142 /* default */ 5143 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5144 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5145 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5146 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5147 if (issbaij) { 5148 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5149 } else { 5150 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5151 } 5152 /* Allow user's customization */ 5153 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5154 } 5155 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5156 if (!n_R) { 5157 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5158 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5159 } 5160 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5161 /* Reuse solver if it is present */ 5162 if (reuse_neumann_solver) { 5163 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5164 5165 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5166 } 5167 /* Set Up KSP for Neumann problem of BDDC */ 5168 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 5169 } 5170 5171 if (pcbddc->dbg_flag) { 5172 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5173 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5174 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5175 } 5176 5177 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5178 check_corr = PETSC_FALSE; 5179 if (pcbddc->NullSpace_corr[0]) { 5180 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5181 } 5182 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5183 check_corr = PETSC_TRUE; 5184 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5185 } 5186 if (neumann && pcbddc->NullSpace_corr[2]) { 5187 check_corr = PETSC_TRUE; 5188 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5189 } 5190 /* check Dirichlet and Neumann solvers */ 5191 if (pcbddc->dbg_flag) { 5192 if (dirichlet) { /* Dirichlet */ 5193 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5194 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5195 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5196 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5197 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5198 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5199 if (check_corr) { 5200 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 5201 } 5202 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5203 } 5204 if (neumann) { /* Neumann */ 5205 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5206 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5207 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5208 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5209 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5210 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5211 if (check_corr) { 5212 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 5213 } 5214 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5215 } 5216 } 5217 /* free Neumann problem's matrix */ 5218 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5219 PetscFunctionReturn(0); 5220 } 5221 5222 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5223 { 5224 PetscErrorCode ierr; 5225 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5226 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5227 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 5228 5229 PetscFunctionBegin; 5230 if (!reuse_solver) { 5231 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5232 } 5233 if (!pcbddc->switch_static) { 5234 if (applytranspose && pcbddc->local_auxmat1) { 5235 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5236 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5237 } 5238 if (!reuse_solver) { 5239 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5240 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5241 } else { 5242 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5243 5244 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5245 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5246 } 5247 } else { 5248 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5249 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5250 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5251 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5252 if (applytranspose && pcbddc->local_auxmat1) { 5253 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5254 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5255 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5256 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5257 } 5258 } 5259 if (!reuse_solver || pcbddc->switch_static) { 5260 if (applytranspose) { 5261 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5262 } else { 5263 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5264 } 5265 } else { 5266 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5267 5268 if (applytranspose) { 5269 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5270 } else { 5271 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5272 } 5273 } 5274 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5275 if (!pcbddc->switch_static) { 5276 if (!reuse_solver) { 5277 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5278 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5279 } else { 5280 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5281 5282 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5283 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5284 } 5285 if (!applytranspose && pcbddc->local_auxmat1) { 5286 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5287 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5288 } 5289 } else { 5290 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5291 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5292 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5293 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5294 if (!applytranspose && pcbddc->local_auxmat1) { 5295 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5296 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5297 } 5298 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5299 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5300 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5301 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5302 } 5303 PetscFunctionReturn(0); 5304 } 5305 5306 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5307 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5308 { 5309 PetscErrorCode ierr; 5310 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5311 PC_IS* pcis = (PC_IS*) (pc->data); 5312 const PetscScalar zero = 0.0; 5313 5314 PetscFunctionBegin; 5315 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5316 if (!pcbddc->benign_apply_coarse_only) { 5317 if (applytranspose) { 5318 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5319 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5320 } else { 5321 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5322 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5323 } 5324 } else { 5325 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5326 } 5327 5328 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5329 if (pcbddc->benign_n) { 5330 PetscScalar *array; 5331 PetscInt j; 5332 5333 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5334 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5335 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5336 } 5337 5338 /* start communications from local primal nodes to rhs of coarse solver */ 5339 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5340 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5341 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5342 5343 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5344 if (pcbddc->coarse_ksp) { 5345 Mat coarse_mat; 5346 Vec rhs,sol; 5347 MatNullSpace nullsp; 5348 PetscBool isbddc = PETSC_FALSE; 5349 5350 if (pcbddc->benign_have_null) { 5351 PC coarse_pc; 5352 5353 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5354 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5355 /* we need to propagate to coarser levels the need for a possible benign correction */ 5356 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5357 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5358 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5359 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5360 } 5361 } 5362 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5363 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5364 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5365 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5366 if (nullsp) { 5367 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5368 } 5369 if (applytranspose) { 5370 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5371 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5372 } else { 5373 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5374 PC coarse_pc; 5375 5376 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5377 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5378 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5379 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5380 } else { 5381 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5382 } 5383 } 5384 /* we don't need the benign correction at coarser levels anymore */ 5385 if (pcbddc->benign_have_null && isbddc) { 5386 PC coarse_pc; 5387 PC_BDDC* coarsepcbddc; 5388 5389 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5390 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5391 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5392 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5393 } 5394 if (nullsp) { 5395 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5396 } 5397 } 5398 5399 /* Local solution on R nodes */ 5400 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5401 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5402 } 5403 /* communications from coarse sol to local primal nodes */ 5404 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5405 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5406 5407 /* Sum contributions from the two levels */ 5408 if (!pcbddc->benign_apply_coarse_only) { 5409 if (applytranspose) { 5410 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5411 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5412 } else { 5413 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5414 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5415 } 5416 /* store p0 */ 5417 if (pcbddc->benign_n) { 5418 PetscScalar *array; 5419 PetscInt j; 5420 5421 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5422 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5423 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5424 } 5425 } else { /* expand the coarse solution */ 5426 if (applytranspose) { 5427 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5428 } else { 5429 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5430 } 5431 } 5432 PetscFunctionReturn(0); 5433 } 5434 5435 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5436 { 5437 PetscErrorCode ierr; 5438 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5439 PetscScalar *array; 5440 Vec from,to; 5441 5442 PetscFunctionBegin; 5443 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5444 from = pcbddc->coarse_vec; 5445 to = pcbddc->vec1_P; 5446 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5447 Vec tvec; 5448 5449 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5450 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5451 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5452 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5453 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5454 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5455 } 5456 } else { /* from local to global -> put data in coarse right hand side */ 5457 from = pcbddc->vec1_P; 5458 to = pcbddc->coarse_vec; 5459 } 5460 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5461 PetscFunctionReturn(0); 5462 } 5463 5464 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5465 { 5466 PetscErrorCode ierr; 5467 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5468 PetscScalar *array; 5469 Vec from,to; 5470 5471 PetscFunctionBegin; 5472 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5473 from = pcbddc->coarse_vec; 5474 to = pcbddc->vec1_P; 5475 } else { /* from local to global -> put data in coarse right hand side */ 5476 from = pcbddc->vec1_P; 5477 to = pcbddc->coarse_vec; 5478 } 5479 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5480 if (smode == SCATTER_FORWARD) { 5481 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5482 Vec tvec; 5483 5484 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5485 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5486 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5487 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5488 } 5489 } else { 5490 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5491 ierr = VecResetArray(from);CHKERRQ(ierr); 5492 } 5493 } 5494 PetscFunctionReturn(0); 5495 } 5496 5497 /* uncomment for testing purposes */ 5498 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5499 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5500 { 5501 PetscErrorCode ierr; 5502 PC_IS* pcis = (PC_IS*)(pc->data); 5503 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5504 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5505 /* one and zero */ 5506 PetscScalar one=1.0,zero=0.0; 5507 /* space to store constraints and their local indices */ 5508 PetscScalar *constraints_data; 5509 PetscInt *constraints_idxs,*constraints_idxs_B; 5510 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5511 PetscInt *constraints_n; 5512 /* iterators */ 5513 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5514 /* BLAS integers */ 5515 PetscBLASInt lwork,lierr; 5516 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5517 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5518 /* reuse */ 5519 PetscInt olocal_primal_size,olocal_primal_size_cc; 5520 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5521 /* change of basis */ 5522 PetscBool qr_needed; 5523 PetscBT change_basis,qr_needed_idx; 5524 /* auxiliary stuff */ 5525 PetscInt *nnz,*is_indices; 5526 PetscInt ncc; 5527 /* some quantities */ 5528 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5529 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5530 5531 PetscFunctionBegin; 5532 /* Destroy Mat objects computed previously */ 5533 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5534 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5535 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5536 /* save info on constraints from previous setup (if any) */ 5537 olocal_primal_size = pcbddc->local_primal_size; 5538 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5539 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5540 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5541 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5542 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5543 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5544 5545 if (!pcbddc->adaptive_selection) { 5546 IS ISForVertices,*ISForFaces,*ISForEdges; 5547 MatNullSpace nearnullsp; 5548 const Vec *nearnullvecs; 5549 Vec *localnearnullsp; 5550 PetscScalar *array; 5551 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5552 PetscBool nnsp_has_cnst; 5553 /* LAPACK working arrays for SVD or POD */ 5554 PetscBool skip_lapack,boolforchange; 5555 PetscScalar *work; 5556 PetscReal *singular_vals; 5557 #if defined(PETSC_USE_COMPLEX) 5558 PetscReal *rwork; 5559 #endif 5560 #if defined(PETSC_MISSING_LAPACK_GESVD) 5561 PetscScalar *temp_basis,*correlation_mat; 5562 #else 5563 PetscBLASInt dummy_int=1; 5564 PetscScalar dummy_scalar=1.; 5565 #endif 5566 5567 /* Get index sets for faces, edges and vertices from graph */ 5568 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5569 /* print some info */ 5570 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5571 PetscInt nv; 5572 5573 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5574 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5575 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5576 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5577 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5578 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5579 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5580 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5581 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5582 } 5583 5584 /* free unneeded index sets */ 5585 if (!pcbddc->use_vertices) { 5586 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5587 } 5588 if (!pcbddc->use_edges) { 5589 for (i=0;i<n_ISForEdges;i++) { 5590 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5591 } 5592 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5593 n_ISForEdges = 0; 5594 } 5595 if (!pcbddc->use_faces) { 5596 for (i=0;i<n_ISForFaces;i++) { 5597 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5598 } 5599 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5600 n_ISForFaces = 0; 5601 } 5602 5603 /* check if near null space is attached to global mat */ 5604 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5605 if (nearnullsp) { 5606 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5607 /* remove any stored info */ 5608 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5609 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5610 /* store information for BDDC solver reuse */ 5611 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5612 pcbddc->onearnullspace = nearnullsp; 5613 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5614 for (i=0;i<nnsp_size;i++) { 5615 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5616 } 5617 } else { /* if near null space is not provided BDDC uses constants by default */ 5618 nnsp_size = 0; 5619 nnsp_has_cnst = PETSC_TRUE; 5620 } 5621 /* get max number of constraints on a single cc */ 5622 max_constraints = nnsp_size; 5623 if (nnsp_has_cnst) max_constraints++; 5624 5625 /* 5626 Evaluate maximum storage size needed by the procedure 5627 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5628 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5629 There can be multiple constraints per connected component 5630 */ 5631 n_vertices = 0; 5632 if (ISForVertices) { 5633 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5634 } 5635 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5636 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5637 5638 total_counts = n_ISForFaces+n_ISForEdges; 5639 total_counts *= max_constraints; 5640 total_counts += n_vertices; 5641 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5642 5643 total_counts = 0; 5644 max_size_of_constraint = 0; 5645 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5646 IS used_is; 5647 if (i<n_ISForEdges) { 5648 used_is = ISForEdges[i]; 5649 } else { 5650 used_is = ISForFaces[i-n_ISForEdges]; 5651 } 5652 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5653 total_counts += j; 5654 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5655 } 5656 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5657 5658 /* get local part of global near null space vectors */ 5659 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5660 for (k=0;k<nnsp_size;k++) { 5661 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5662 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5663 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5664 } 5665 5666 /* whether or not to skip lapack calls */ 5667 skip_lapack = PETSC_TRUE; 5668 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5669 5670 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5671 if (!skip_lapack) { 5672 PetscScalar temp_work; 5673 5674 #if defined(PETSC_MISSING_LAPACK_GESVD) 5675 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5676 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5677 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5678 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5679 #if defined(PETSC_USE_COMPLEX) 5680 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5681 #endif 5682 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5683 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5684 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5685 lwork = -1; 5686 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5687 #if !defined(PETSC_USE_COMPLEX) 5688 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5689 #else 5690 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5691 #endif 5692 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5693 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5694 #else /* on missing GESVD */ 5695 /* SVD */ 5696 PetscInt max_n,min_n; 5697 max_n = max_size_of_constraint; 5698 min_n = max_constraints; 5699 if (max_size_of_constraint < max_constraints) { 5700 min_n = max_size_of_constraint; 5701 max_n = max_constraints; 5702 } 5703 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5704 #if defined(PETSC_USE_COMPLEX) 5705 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5706 #endif 5707 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5708 lwork = -1; 5709 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5710 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5711 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5712 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5713 #if !defined(PETSC_USE_COMPLEX) 5714 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5715 #else 5716 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5717 #endif 5718 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5719 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5720 #endif /* on missing GESVD */ 5721 /* Allocate optimal workspace */ 5722 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5723 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5724 } 5725 /* Now we can loop on constraining sets */ 5726 total_counts = 0; 5727 constraints_idxs_ptr[0] = 0; 5728 constraints_data_ptr[0] = 0; 5729 /* vertices */ 5730 if (n_vertices) { 5731 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5732 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5733 for (i=0;i<n_vertices;i++) { 5734 constraints_n[total_counts] = 1; 5735 constraints_data[total_counts] = 1.0; 5736 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5737 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5738 total_counts++; 5739 } 5740 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5741 n_vertices = total_counts; 5742 } 5743 5744 /* edges and faces */ 5745 total_counts_cc = total_counts; 5746 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5747 IS used_is; 5748 PetscBool idxs_copied = PETSC_FALSE; 5749 5750 if (ncc<n_ISForEdges) { 5751 used_is = ISForEdges[ncc]; 5752 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5753 } else { 5754 used_is = ISForFaces[ncc-n_ISForEdges]; 5755 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5756 } 5757 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5758 5759 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5760 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5761 /* change of basis should not be performed on local periodic nodes */ 5762 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5763 if (nnsp_has_cnst) { 5764 PetscScalar quad_value; 5765 5766 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5767 idxs_copied = PETSC_TRUE; 5768 5769 if (!pcbddc->use_nnsp_true) { 5770 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5771 } else { 5772 quad_value = 1.0; 5773 } 5774 for (j=0;j<size_of_constraint;j++) { 5775 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5776 } 5777 temp_constraints++; 5778 total_counts++; 5779 } 5780 for (k=0;k<nnsp_size;k++) { 5781 PetscReal real_value; 5782 PetscScalar *ptr_to_data; 5783 5784 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5785 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5786 for (j=0;j<size_of_constraint;j++) { 5787 ptr_to_data[j] = array[is_indices[j]]; 5788 } 5789 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5790 /* check if array is null on the connected component */ 5791 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5792 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5793 if (real_value > 0.0) { /* keep indices and values */ 5794 temp_constraints++; 5795 total_counts++; 5796 if (!idxs_copied) { 5797 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5798 idxs_copied = PETSC_TRUE; 5799 } 5800 } 5801 } 5802 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5803 valid_constraints = temp_constraints; 5804 if (!pcbddc->use_nnsp_true && temp_constraints) { 5805 if (temp_constraints == 1) { /* just normalize the constraint */ 5806 PetscScalar norm,*ptr_to_data; 5807 5808 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5809 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5810 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5811 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5812 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5813 } else { /* perform SVD */ 5814 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5815 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5816 5817 #if defined(PETSC_MISSING_LAPACK_GESVD) 5818 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5819 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5820 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5821 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5822 from that computed using LAPACKgesvd 5823 -> This is due to a different computation of eigenvectors in LAPACKheev 5824 -> The quality of the POD-computed basis will be the same */ 5825 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5826 /* Store upper triangular part of correlation matrix */ 5827 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5828 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5829 for (j=0;j<temp_constraints;j++) { 5830 for (k=0;k<j+1;k++) { 5831 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5832 } 5833 } 5834 /* compute eigenvalues and eigenvectors of correlation matrix */ 5835 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5836 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5837 #if !defined(PETSC_USE_COMPLEX) 5838 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5839 #else 5840 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5841 #endif 5842 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5843 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5844 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5845 j = 0; 5846 while (j < temp_constraints && singular_vals[j] < tol) j++; 5847 total_counts = total_counts-j; 5848 valid_constraints = temp_constraints-j; 5849 /* scale and copy POD basis into used quadrature memory */ 5850 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5851 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5852 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5853 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5854 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5855 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5856 if (j<temp_constraints) { 5857 PetscInt ii; 5858 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5859 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5860 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5861 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5862 for (k=0;k<temp_constraints-j;k++) { 5863 for (ii=0;ii<size_of_constraint;ii++) { 5864 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5865 } 5866 } 5867 } 5868 #else /* on missing GESVD */ 5869 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5870 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5871 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5872 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5873 #if !defined(PETSC_USE_COMPLEX) 5874 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5875 #else 5876 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5877 #endif 5878 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5879 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5880 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5881 k = temp_constraints; 5882 if (k > size_of_constraint) k = size_of_constraint; 5883 j = 0; 5884 while (j < k && singular_vals[k-j-1] < tol) j++; 5885 valid_constraints = k-j; 5886 total_counts = total_counts-temp_constraints+valid_constraints; 5887 #endif /* on missing GESVD */ 5888 } 5889 } 5890 /* update pointers information */ 5891 if (valid_constraints) { 5892 constraints_n[total_counts_cc] = valid_constraints; 5893 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5894 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5895 /* set change_of_basis flag */ 5896 if (boolforchange) { 5897 PetscBTSet(change_basis,total_counts_cc); 5898 } 5899 total_counts_cc++; 5900 } 5901 } 5902 /* free workspace */ 5903 if (!skip_lapack) { 5904 ierr = PetscFree(work);CHKERRQ(ierr); 5905 #if defined(PETSC_USE_COMPLEX) 5906 ierr = PetscFree(rwork);CHKERRQ(ierr); 5907 #endif 5908 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5909 #if defined(PETSC_MISSING_LAPACK_GESVD) 5910 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5911 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5912 #endif 5913 } 5914 for (k=0;k<nnsp_size;k++) { 5915 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5916 } 5917 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5918 /* free index sets of faces, edges and vertices */ 5919 for (i=0;i<n_ISForFaces;i++) { 5920 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5921 } 5922 if (n_ISForFaces) { 5923 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5924 } 5925 for (i=0;i<n_ISForEdges;i++) { 5926 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5927 } 5928 if (n_ISForEdges) { 5929 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5930 } 5931 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5932 } else { 5933 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5934 5935 total_counts = 0; 5936 n_vertices = 0; 5937 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5938 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5939 } 5940 max_constraints = 0; 5941 total_counts_cc = 0; 5942 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5943 total_counts += pcbddc->adaptive_constraints_n[i]; 5944 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5945 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5946 } 5947 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5948 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5949 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5950 constraints_data = pcbddc->adaptive_constraints_data; 5951 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5952 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5953 total_counts_cc = 0; 5954 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5955 if (pcbddc->adaptive_constraints_n[i]) { 5956 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5957 } 5958 } 5959 #if 0 5960 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5961 for (i=0;i<total_counts_cc;i++) { 5962 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5963 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5964 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5965 printf(" %d",constraints_idxs[j]); 5966 } 5967 printf("\n"); 5968 printf("number of cc: %d\n",constraints_n[i]); 5969 } 5970 for (i=0;i<n_vertices;i++) { 5971 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5972 } 5973 for (i=0;i<sub_schurs->n_subs;i++) { 5974 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5975 } 5976 #endif 5977 5978 max_size_of_constraint = 0; 5979 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5980 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5981 /* Change of basis */ 5982 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5983 if (pcbddc->use_change_of_basis) { 5984 for (i=0;i<sub_schurs->n_subs;i++) { 5985 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5986 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5987 } 5988 } 5989 } 5990 } 5991 pcbddc->local_primal_size = total_counts; 5992 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5993 5994 /* map constraints_idxs in boundary numbering */ 5995 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5996 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5997 5998 /* Create constraint matrix */ 5999 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6000 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 6001 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 6002 6003 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6004 /* determine if a QR strategy is needed for change of basis */ 6005 qr_needed = PETSC_FALSE; 6006 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6007 total_primal_vertices=0; 6008 pcbddc->local_primal_size_cc = 0; 6009 for (i=0;i<total_counts_cc;i++) { 6010 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6011 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6012 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6013 pcbddc->local_primal_size_cc += 1; 6014 } else if (PetscBTLookup(change_basis,i)) { 6015 for (k=0;k<constraints_n[i];k++) { 6016 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6017 } 6018 pcbddc->local_primal_size_cc += constraints_n[i]; 6019 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6020 PetscBTSet(qr_needed_idx,i); 6021 qr_needed = PETSC_TRUE; 6022 } 6023 } else { 6024 pcbddc->local_primal_size_cc += 1; 6025 } 6026 } 6027 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6028 pcbddc->n_vertices = total_primal_vertices; 6029 /* permute indices in order to have a sorted set of vertices */ 6030 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6031 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6032 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 6033 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6034 6035 /* nonzero structure of constraint matrix */ 6036 /* and get reference dof for local constraints */ 6037 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6038 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6039 6040 j = total_primal_vertices; 6041 total_counts = total_primal_vertices; 6042 cum = total_primal_vertices; 6043 for (i=n_vertices;i<total_counts_cc;i++) { 6044 if (!PetscBTLookup(change_basis,i)) { 6045 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6046 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6047 cum++; 6048 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6049 for (k=0;k<constraints_n[i];k++) { 6050 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6051 nnz[j+k] = size_of_constraint; 6052 } 6053 j += constraints_n[i]; 6054 } 6055 } 6056 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6057 ierr = PetscFree(nnz);CHKERRQ(ierr); 6058 6059 /* set values in constraint matrix */ 6060 for (i=0;i<total_primal_vertices;i++) { 6061 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6062 } 6063 total_counts = total_primal_vertices; 6064 for (i=n_vertices;i<total_counts_cc;i++) { 6065 if (!PetscBTLookup(change_basis,i)) { 6066 PetscInt *cols; 6067 6068 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6069 cols = constraints_idxs+constraints_idxs_ptr[i]; 6070 for (k=0;k<constraints_n[i];k++) { 6071 PetscInt row = total_counts+k; 6072 PetscScalar *vals; 6073 6074 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6075 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6076 } 6077 total_counts += constraints_n[i]; 6078 } 6079 } 6080 /* assembling */ 6081 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6082 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6083 6084 /* 6085 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6086 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 6087 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 6088 */ 6089 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6090 if (pcbddc->use_change_of_basis) { 6091 /* dual and primal dofs on a single cc */ 6092 PetscInt dual_dofs,primal_dofs; 6093 /* working stuff for GEQRF */ 6094 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 6095 PetscBLASInt lqr_work; 6096 /* working stuff for UNGQR */ 6097 PetscScalar *gqr_work,lgqr_work_t; 6098 PetscBLASInt lgqr_work; 6099 /* working stuff for TRTRS */ 6100 PetscScalar *trs_rhs; 6101 PetscBLASInt Blas_NRHS; 6102 /* pointers for values insertion into change of basis matrix */ 6103 PetscInt *start_rows,*start_cols; 6104 PetscScalar *start_vals; 6105 /* working stuff for values insertion */ 6106 PetscBT is_primal; 6107 PetscInt *aux_primal_numbering_B; 6108 /* matrix sizes */ 6109 PetscInt global_size,local_size; 6110 /* temporary change of basis */ 6111 Mat localChangeOfBasisMatrix; 6112 /* extra space for debugging */ 6113 PetscScalar *dbg_work; 6114 6115 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6116 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6117 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6118 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6119 /* nonzeros for local mat */ 6120 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6121 if (!pcbddc->benign_change || pcbddc->fake_change) { 6122 for (i=0;i<pcis->n;i++) nnz[i]=1; 6123 } else { 6124 const PetscInt *ii; 6125 PetscInt n; 6126 PetscBool flg_row; 6127 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6128 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6129 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6130 } 6131 for (i=n_vertices;i<total_counts_cc;i++) { 6132 if (PetscBTLookup(change_basis,i)) { 6133 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6134 if (PetscBTLookup(qr_needed_idx,i)) { 6135 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6136 } else { 6137 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6138 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6139 } 6140 } 6141 } 6142 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6143 ierr = PetscFree(nnz);CHKERRQ(ierr); 6144 /* Set interior change in the matrix */ 6145 if (!pcbddc->benign_change || pcbddc->fake_change) { 6146 for (i=0;i<pcis->n;i++) { 6147 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6148 } 6149 } else { 6150 const PetscInt *ii,*jj; 6151 PetscScalar *aa; 6152 PetscInt n; 6153 PetscBool flg_row; 6154 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6155 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6156 for (i=0;i<n;i++) { 6157 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6158 } 6159 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6160 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6161 } 6162 6163 if (pcbddc->dbg_flag) { 6164 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6165 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6166 } 6167 6168 6169 /* Now we loop on the constraints which need a change of basis */ 6170 /* 6171 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6172 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6173 6174 Basic blocks of change of basis matrix T computed by 6175 6176 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6177 6178 | 1 0 ... 0 s_1/S | 6179 | 0 1 ... 0 s_2/S | 6180 | ... | 6181 | 0 ... 1 s_{n-1}/S | 6182 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6183 6184 with S = \sum_{i=1}^n s_i^2 6185 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6186 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6187 6188 - QR decomposition of constraints otherwise 6189 */ 6190 if (qr_needed) { 6191 /* space to store Q */ 6192 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6193 /* array to store scaling factors for reflectors */ 6194 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6195 /* first we issue queries for optimal work */ 6196 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6197 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6198 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6199 lqr_work = -1; 6200 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6201 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6202 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6203 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6204 lgqr_work = -1; 6205 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6206 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6207 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6208 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6209 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6210 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6211 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6212 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6213 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6214 /* array to store rhs and solution of triangular solver */ 6215 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6216 /* allocating workspace for check */ 6217 if (pcbddc->dbg_flag) { 6218 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6219 } 6220 } 6221 /* array to store whether a node is primal or not */ 6222 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6223 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6224 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6225 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 6226 for (i=0;i<total_primal_vertices;i++) { 6227 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6228 } 6229 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6230 6231 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6232 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6233 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6234 if (PetscBTLookup(change_basis,total_counts)) { 6235 /* get constraint info */ 6236 primal_dofs = constraints_n[total_counts]; 6237 dual_dofs = size_of_constraint-primal_dofs; 6238 6239 if (pcbddc->dbg_flag) { 6240 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6241 } 6242 6243 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6244 6245 /* copy quadrature constraints for change of basis check */ 6246 if (pcbddc->dbg_flag) { 6247 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6248 } 6249 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6250 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6251 6252 /* compute QR decomposition of constraints */ 6253 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6254 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6255 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6256 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6257 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6258 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6259 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6260 6261 /* explictly compute R^-T */ 6262 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 6263 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6264 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6265 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6266 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6267 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6268 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6269 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6270 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6271 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6272 6273 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6274 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6275 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6276 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6277 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6278 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6279 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6280 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6281 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6282 6283 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6284 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6285 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6286 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6287 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6288 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6289 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6290 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6291 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6292 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6293 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6294 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6295 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6296 6297 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6298 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6299 /* insert cols for primal dofs */ 6300 for (j=0;j<primal_dofs;j++) { 6301 start_vals = &qr_basis[j*size_of_constraint]; 6302 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6303 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6304 } 6305 /* insert cols for dual dofs */ 6306 for (j=0,k=0;j<dual_dofs;k++) { 6307 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6308 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6309 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6310 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6311 j++; 6312 } 6313 } 6314 6315 /* check change of basis */ 6316 if (pcbddc->dbg_flag) { 6317 PetscInt ii,jj; 6318 PetscBool valid_qr=PETSC_TRUE; 6319 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6320 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6321 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6322 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6323 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6324 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6325 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6326 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6327 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6328 for (jj=0;jj<size_of_constraint;jj++) { 6329 for (ii=0;ii<primal_dofs;ii++) { 6330 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6331 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6332 } 6333 } 6334 if (!valid_qr) { 6335 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6336 for (jj=0;jj<size_of_constraint;jj++) { 6337 for (ii=0;ii<primal_dofs;ii++) { 6338 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6339 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6340 } 6341 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6342 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6343 } 6344 } 6345 } 6346 } else { 6347 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6348 } 6349 } 6350 } else { /* simple transformation block */ 6351 PetscInt row,col; 6352 PetscScalar val,norm; 6353 6354 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6355 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6356 for (j=0;j<size_of_constraint;j++) { 6357 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6358 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6359 if (!PetscBTLookup(is_primal,row_B)) { 6360 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6361 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6362 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6363 } else { 6364 for (k=0;k<size_of_constraint;k++) { 6365 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6366 if (row != col) { 6367 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6368 } else { 6369 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6370 } 6371 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6372 } 6373 } 6374 } 6375 if (pcbddc->dbg_flag) { 6376 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6377 } 6378 } 6379 } else { 6380 if (pcbddc->dbg_flag) { 6381 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6382 } 6383 } 6384 } 6385 6386 /* free workspace */ 6387 if (qr_needed) { 6388 if (pcbddc->dbg_flag) { 6389 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6390 } 6391 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6392 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6393 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6394 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6395 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6396 } 6397 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6398 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6399 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6400 6401 /* assembling of global change of variable */ 6402 if (!pcbddc->fake_change) { 6403 Mat tmat; 6404 PetscInt bs; 6405 6406 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6407 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6408 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6409 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6410 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6411 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6412 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6413 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6414 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6415 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6416 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6417 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6418 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6419 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6420 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6421 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6422 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6423 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6424 6425 /* check */ 6426 if (pcbddc->dbg_flag) { 6427 PetscReal error; 6428 Vec x,x_change; 6429 6430 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6431 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6432 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6433 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6434 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6435 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6436 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6437 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6438 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6439 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6440 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6441 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6442 if (error > PETSC_SMALL) { 6443 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6444 } 6445 ierr = VecDestroy(&x);CHKERRQ(ierr); 6446 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6447 } 6448 /* adapt sub_schurs computed (if any) */ 6449 if (pcbddc->use_deluxe_scaling) { 6450 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6451 6452 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6453 if (sub_schurs && sub_schurs->S_Ej_all) { 6454 Mat S_new,tmat; 6455 IS is_all_N,is_V_Sall = NULL; 6456 6457 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6458 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6459 if (pcbddc->deluxe_zerorows) { 6460 ISLocalToGlobalMapping NtoSall; 6461 IS is_V; 6462 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6463 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6464 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6465 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6466 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6467 } 6468 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6469 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6470 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6471 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6472 if (pcbddc->deluxe_zerorows) { 6473 const PetscScalar *array; 6474 const PetscInt *idxs_V,*idxs_all; 6475 PetscInt i,n_V; 6476 6477 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6478 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6479 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6480 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6481 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6482 for (i=0;i<n_V;i++) { 6483 PetscScalar val; 6484 PetscInt idx; 6485 6486 idx = idxs_V[i]; 6487 val = array[idxs_all[idxs_V[i]]]; 6488 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6489 } 6490 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6491 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6492 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6493 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6494 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6495 } 6496 sub_schurs->S_Ej_all = S_new; 6497 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6498 if (sub_schurs->sum_S_Ej_all) { 6499 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6500 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6501 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6502 if (pcbddc->deluxe_zerorows) { 6503 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6504 } 6505 sub_schurs->sum_S_Ej_all = S_new; 6506 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6507 } 6508 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6509 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6510 } 6511 /* destroy any change of basis context in sub_schurs */ 6512 if (sub_schurs && sub_schurs->change) { 6513 PetscInt i; 6514 6515 for (i=0;i<sub_schurs->n_subs;i++) { 6516 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6517 } 6518 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6519 } 6520 } 6521 if (pcbddc->switch_static) { /* need to save the local change */ 6522 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6523 } else { 6524 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6525 } 6526 /* determine if any process has changed the pressures locally */ 6527 pcbddc->change_interior = pcbddc->benign_have_null; 6528 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6529 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6530 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6531 pcbddc->use_qr_single = qr_needed; 6532 } 6533 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6534 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6535 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6536 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6537 } else { 6538 Mat benign_global = NULL; 6539 if (pcbddc->benign_have_null) { 6540 Mat tmat; 6541 6542 pcbddc->change_interior = PETSC_TRUE; 6543 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6544 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6545 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6546 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6547 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6548 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6549 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6550 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6551 if (pcbddc->benign_change) { 6552 Mat M; 6553 6554 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6555 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6556 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6557 ierr = MatDestroy(&M);CHKERRQ(ierr); 6558 } else { 6559 Mat eye; 6560 PetscScalar *array; 6561 6562 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6563 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6564 for (i=0;i<pcis->n;i++) { 6565 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6566 } 6567 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6568 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6569 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6570 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6571 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6572 } 6573 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6574 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6575 } 6576 if (pcbddc->user_ChangeOfBasisMatrix) { 6577 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6578 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6579 } else if (pcbddc->benign_have_null) { 6580 pcbddc->ChangeOfBasisMatrix = benign_global; 6581 } 6582 } 6583 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6584 IS is_global; 6585 const PetscInt *gidxs; 6586 6587 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6588 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6589 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6590 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6591 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6592 } 6593 } 6594 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6595 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6596 } 6597 6598 if (!pcbddc->fake_change) { 6599 /* add pressure dofs to set of primal nodes for numbering purposes */ 6600 for (i=0;i<pcbddc->benign_n;i++) { 6601 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6602 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6603 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6604 pcbddc->local_primal_size_cc++; 6605 pcbddc->local_primal_size++; 6606 } 6607 6608 /* check if a new primal space has been introduced (also take into account benign trick) */ 6609 pcbddc->new_primal_space_local = PETSC_TRUE; 6610 if (olocal_primal_size == pcbddc->local_primal_size) { 6611 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6612 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6613 if (!pcbddc->new_primal_space_local) { 6614 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6615 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6616 } 6617 } 6618 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6619 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6620 } 6621 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6622 6623 /* flush dbg viewer */ 6624 if (pcbddc->dbg_flag) { 6625 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6626 } 6627 6628 /* free workspace */ 6629 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6630 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6631 if (!pcbddc->adaptive_selection) { 6632 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6633 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6634 } else { 6635 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6636 pcbddc->adaptive_constraints_idxs_ptr, 6637 pcbddc->adaptive_constraints_data_ptr, 6638 pcbddc->adaptive_constraints_idxs, 6639 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6640 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6641 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6642 } 6643 PetscFunctionReturn(0); 6644 } 6645 6646 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6647 { 6648 ISLocalToGlobalMapping map; 6649 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6650 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6651 PetscInt i,N; 6652 PetscBool rcsr = PETSC_FALSE; 6653 PetscErrorCode ierr; 6654 6655 PetscFunctionBegin; 6656 if (pcbddc->recompute_topography) { 6657 pcbddc->graphanalyzed = PETSC_FALSE; 6658 /* Reset previously computed graph */ 6659 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6660 /* Init local Graph struct */ 6661 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6662 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6663 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6664 6665 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6666 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6667 } 6668 /* Check validity of the csr graph passed in by the user */ 6669 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6670 6671 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6672 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6673 PetscInt *xadj,*adjncy; 6674 PetscInt nvtxs; 6675 PetscBool flg_row=PETSC_FALSE; 6676 6677 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6678 if (flg_row) { 6679 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6680 pcbddc->computed_rowadj = PETSC_TRUE; 6681 } 6682 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6683 rcsr = PETSC_TRUE; 6684 } 6685 if (pcbddc->dbg_flag) { 6686 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6687 } 6688 6689 /* Setup of Graph */ 6690 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6691 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6692 6693 /* attach info on disconnected subdomains if present */ 6694 if (pcbddc->n_local_subs) { 6695 PetscInt *local_subs; 6696 6697 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6698 for (i=0;i<pcbddc->n_local_subs;i++) { 6699 const PetscInt *idxs; 6700 PetscInt nl,j; 6701 6702 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6703 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6704 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6705 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6706 } 6707 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6708 pcbddc->mat_graph->local_subs = local_subs; 6709 } 6710 } 6711 6712 if (!pcbddc->graphanalyzed) { 6713 /* Graph's connected components analysis */ 6714 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6715 pcbddc->graphanalyzed = PETSC_TRUE; 6716 } 6717 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6718 PetscFunctionReturn(0); 6719 } 6720 6721 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6722 { 6723 PetscInt i,j; 6724 PetscScalar *alphas; 6725 PetscErrorCode ierr; 6726 6727 PetscFunctionBegin; 6728 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6729 for (i=0;i<n;i++) { 6730 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6731 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6732 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6733 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6734 } 6735 ierr = PetscFree(alphas);CHKERRQ(ierr); 6736 PetscFunctionReturn(0); 6737 } 6738 6739 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6740 { 6741 Mat A; 6742 PetscInt n_neighs,*neighs,*n_shared,**shared; 6743 PetscMPIInt size,rank,color; 6744 PetscInt *xadj,*adjncy; 6745 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6746 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6747 PetscInt void_procs,*procs_candidates = NULL; 6748 PetscInt xadj_count,*count; 6749 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6750 PetscSubcomm psubcomm; 6751 MPI_Comm subcomm; 6752 PetscErrorCode ierr; 6753 6754 PetscFunctionBegin; 6755 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6756 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6757 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6758 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6759 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6760 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6761 6762 if (have_void) *have_void = PETSC_FALSE; 6763 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6764 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6765 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6766 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6767 im_active = !!n; 6768 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6769 void_procs = size - active_procs; 6770 /* get ranks of of non-active processes in mat communicator */ 6771 if (void_procs) { 6772 PetscInt ncand; 6773 6774 if (have_void) *have_void = PETSC_TRUE; 6775 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6776 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6777 for (i=0,ncand=0;i<size;i++) { 6778 if (!procs_candidates[i]) { 6779 procs_candidates[ncand++] = i; 6780 } 6781 } 6782 /* force n_subdomains to be not greater that the number of non-active processes */ 6783 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6784 } 6785 6786 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6787 number of subdomains requested 1 -> send to master or first candidate in voids */ 6788 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6789 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6790 PetscInt issize,isidx,dest; 6791 if (*n_subdomains == 1) dest = 0; 6792 else dest = rank; 6793 if (im_active) { 6794 issize = 1; 6795 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6796 isidx = procs_candidates[dest]; 6797 } else { 6798 isidx = dest; 6799 } 6800 } else { 6801 issize = 0; 6802 isidx = -1; 6803 } 6804 if (*n_subdomains != 1) *n_subdomains = active_procs; 6805 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6806 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6807 PetscFunctionReturn(0); 6808 } 6809 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6810 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6811 threshold = PetscMax(threshold,2); 6812 6813 /* Get info on mapping */ 6814 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6815 6816 /* build local CSR graph of subdomains' connectivity */ 6817 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6818 xadj[0] = 0; 6819 xadj[1] = PetscMax(n_neighs-1,0); 6820 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6821 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6822 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6823 for (i=1;i<n_neighs;i++) 6824 for (j=0;j<n_shared[i];j++) 6825 count[shared[i][j]] += 1; 6826 6827 xadj_count = 0; 6828 for (i=1;i<n_neighs;i++) { 6829 for (j=0;j<n_shared[i];j++) { 6830 if (count[shared[i][j]] < threshold) { 6831 adjncy[xadj_count] = neighs[i]; 6832 adjncy_wgt[xadj_count] = n_shared[i]; 6833 xadj_count++; 6834 break; 6835 } 6836 } 6837 } 6838 xadj[1] = xadj_count; 6839 ierr = PetscFree(count);CHKERRQ(ierr); 6840 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6841 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6842 6843 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6844 6845 /* Restrict work on active processes only */ 6846 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6847 if (void_procs) { 6848 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6849 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6850 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6851 subcomm = PetscSubcommChild(psubcomm); 6852 } else { 6853 psubcomm = NULL; 6854 subcomm = PetscObjectComm((PetscObject)mat); 6855 } 6856 6857 v_wgt = NULL; 6858 if (!color) { 6859 ierr = PetscFree(xadj);CHKERRQ(ierr); 6860 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6861 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6862 } else { 6863 Mat subdomain_adj; 6864 IS new_ranks,new_ranks_contig; 6865 MatPartitioning partitioner; 6866 PetscInt rstart=0,rend=0; 6867 PetscInt *is_indices,*oldranks; 6868 PetscMPIInt size; 6869 PetscBool aggregate; 6870 6871 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6872 if (void_procs) { 6873 PetscInt prank = rank; 6874 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6875 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6876 for (i=0;i<xadj[1];i++) { 6877 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6878 } 6879 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6880 } else { 6881 oldranks = NULL; 6882 } 6883 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6884 if (aggregate) { /* TODO: all this part could be made more efficient */ 6885 PetscInt lrows,row,ncols,*cols; 6886 PetscMPIInt nrank; 6887 PetscScalar *vals; 6888 6889 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6890 lrows = 0; 6891 if (nrank<redprocs) { 6892 lrows = size/redprocs; 6893 if (nrank<size%redprocs) lrows++; 6894 } 6895 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6896 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6897 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6898 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6899 row = nrank; 6900 ncols = xadj[1]-xadj[0]; 6901 cols = adjncy; 6902 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6903 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6904 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6905 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6906 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6907 ierr = PetscFree(xadj);CHKERRQ(ierr); 6908 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6909 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6910 ierr = PetscFree(vals);CHKERRQ(ierr); 6911 if (use_vwgt) { 6912 Vec v; 6913 const PetscScalar *array; 6914 PetscInt nl; 6915 6916 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6917 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 6918 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6919 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6920 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6921 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6922 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6923 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6924 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6925 ierr = VecDestroy(&v);CHKERRQ(ierr); 6926 } 6927 } else { 6928 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6929 if (use_vwgt) { 6930 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6931 v_wgt[0] = n; 6932 } 6933 } 6934 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6935 6936 /* Partition */ 6937 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6938 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6939 if (v_wgt) { 6940 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6941 } 6942 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6943 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6944 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6945 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6946 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6947 6948 /* renumber new_ranks to avoid "holes" in new set of processors */ 6949 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6950 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6951 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6952 if (!aggregate) { 6953 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6954 #if defined(PETSC_USE_DEBUG) 6955 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6956 #endif 6957 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6958 } else if (oldranks) { 6959 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6960 } else { 6961 ranks_send_to_idx[0] = is_indices[0]; 6962 } 6963 } else { 6964 PetscInt idx = 0; 6965 PetscMPIInt tag; 6966 MPI_Request *reqs; 6967 6968 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6969 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6970 for (i=rstart;i<rend;i++) { 6971 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6972 } 6973 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6974 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6975 ierr = PetscFree(reqs);CHKERRQ(ierr); 6976 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6977 #if defined(PETSC_USE_DEBUG) 6978 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6979 #endif 6980 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 6981 } else if (oldranks) { 6982 ranks_send_to_idx[0] = oldranks[idx]; 6983 } else { 6984 ranks_send_to_idx[0] = idx; 6985 } 6986 } 6987 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6988 /* clean up */ 6989 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6990 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6991 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6992 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6993 } 6994 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6995 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6996 6997 /* assemble parallel IS for sends */ 6998 i = 1; 6999 if (!color) i=0; 7000 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 7001 PetscFunctionReturn(0); 7002 } 7003 7004 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7005 7006 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7007 { 7008 Mat local_mat; 7009 IS is_sends_internal; 7010 PetscInt rows,cols,new_local_rows; 7011 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7012 PetscBool ismatis,isdense,newisdense,destroy_mat; 7013 ISLocalToGlobalMapping l2gmap; 7014 PetscInt* l2gmap_indices; 7015 const PetscInt* is_indices; 7016 MatType new_local_type; 7017 /* buffers */ 7018 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7019 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7020 PetscInt *recv_buffer_idxs_local; 7021 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 7022 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7023 /* MPI */ 7024 MPI_Comm comm,comm_n; 7025 PetscSubcomm subcomm; 7026 PetscMPIInt n_sends,n_recvs,commsize; 7027 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7028 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7029 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7030 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7031 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7032 PetscErrorCode ierr; 7033 7034 PetscFunctionBegin; 7035 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7036 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7037 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7038 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7039 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7040 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7041 PetscValidLogicalCollectiveBool(mat,reuse,6); 7042 PetscValidLogicalCollectiveInt(mat,nis,8); 7043 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7044 if (nvecs) { 7045 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7046 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7047 } 7048 /* further checks */ 7049 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7050 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7051 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7052 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7053 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7054 if (reuse && *mat_n) { 7055 PetscInt mrows,mcols,mnrows,mncols; 7056 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7057 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7058 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7059 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7060 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7061 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7062 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7063 } 7064 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7065 PetscValidLogicalCollectiveInt(mat,bs,0); 7066 7067 /* prepare IS for sending if not provided */ 7068 if (!is_sends) { 7069 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7070 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7071 } else { 7072 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7073 is_sends_internal = is_sends; 7074 } 7075 7076 /* get comm */ 7077 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7078 7079 /* compute number of sends */ 7080 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7081 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7082 7083 /* compute number of receives */ 7084 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 7085 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 7086 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 7087 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7088 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7089 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7090 ierr = PetscFree(iflags);CHKERRQ(ierr); 7091 7092 /* restrict comm if requested */ 7093 subcomm = 0; 7094 destroy_mat = PETSC_FALSE; 7095 if (restrict_comm) { 7096 PetscMPIInt color,subcommsize; 7097 7098 color = 0; 7099 if (restrict_full) { 7100 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7101 } else { 7102 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7103 } 7104 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7105 subcommsize = commsize - subcommsize; 7106 /* check if reuse has been requested */ 7107 if (reuse) { 7108 if (*mat_n) { 7109 PetscMPIInt subcommsize2; 7110 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 7111 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7112 comm_n = PetscObjectComm((PetscObject)*mat_n); 7113 } else { 7114 comm_n = PETSC_COMM_SELF; 7115 } 7116 } else { /* MAT_INITIAL_MATRIX */ 7117 PetscMPIInt rank; 7118 7119 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 7120 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7121 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7122 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7123 comm_n = PetscSubcommChild(subcomm); 7124 } 7125 /* flag to destroy *mat_n if not significative */ 7126 if (color) destroy_mat = PETSC_TRUE; 7127 } else { 7128 comm_n = comm; 7129 } 7130 7131 /* prepare send/receive buffers */ 7132 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 7133 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 7134 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 7135 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 7136 if (nis) { 7137 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 7138 } 7139 7140 /* Get data from local matrices */ 7141 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7142 /* TODO: See below some guidelines on how to prepare the local buffers */ 7143 /* 7144 send_buffer_vals should contain the raw values of the local matrix 7145 send_buffer_idxs should contain: 7146 - MatType_PRIVATE type 7147 - PetscInt size_of_l2gmap 7148 - PetscInt global_row_indices[size_of_l2gmap] 7149 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7150 */ 7151 else { 7152 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7153 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7154 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7155 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7156 send_buffer_idxs[1] = i; 7157 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7158 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 7159 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7160 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7161 for (i=0;i<n_sends;i++) { 7162 ilengths_vals[is_indices[i]] = len*len; 7163 ilengths_idxs[is_indices[i]] = len+2; 7164 } 7165 } 7166 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7167 /* additional is (if any) */ 7168 if (nis) { 7169 PetscMPIInt psum; 7170 PetscInt j; 7171 for (j=0,psum=0;j<nis;j++) { 7172 PetscInt plen; 7173 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7174 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7175 psum += len+1; /* indices + lenght */ 7176 } 7177 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7178 for (j=0,psum=0;j<nis;j++) { 7179 PetscInt plen; 7180 const PetscInt *is_array_idxs; 7181 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7182 send_buffer_idxs_is[psum] = plen; 7183 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7184 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 7185 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7186 psum += plen+1; /* indices + lenght */ 7187 } 7188 for (i=0;i<n_sends;i++) { 7189 ilengths_idxs_is[is_indices[i]] = psum; 7190 } 7191 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7192 } 7193 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7194 7195 buf_size_idxs = 0; 7196 buf_size_vals = 0; 7197 buf_size_idxs_is = 0; 7198 buf_size_vecs = 0; 7199 for (i=0;i<n_recvs;i++) { 7200 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7201 buf_size_vals += (PetscInt)olengths_vals[i]; 7202 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7203 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7204 } 7205 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7206 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7207 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7208 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7209 7210 /* get new tags for clean communications */ 7211 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7212 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7213 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7214 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7215 7216 /* allocate for requests */ 7217 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7218 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7219 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7220 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7221 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7222 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7223 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7224 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7225 7226 /* communications */ 7227 ptr_idxs = recv_buffer_idxs; 7228 ptr_vals = recv_buffer_vals; 7229 ptr_idxs_is = recv_buffer_idxs_is; 7230 ptr_vecs = recv_buffer_vecs; 7231 for (i=0;i<n_recvs;i++) { 7232 source_dest = onodes[i]; 7233 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7234 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7235 ptr_idxs += olengths_idxs[i]; 7236 ptr_vals += olengths_vals[i]; 7237 if (nis) { 7238 source_dest = onodes_is[i]; 7239 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7240 ptr_idxs_is += olengths_idxs_is[i]; 7241 } 7242 if (nvecs) { 7243 source_dest = onodes[i]; 7244 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7245 ptr_vecs += olengths_idxs[i]-2; 7246 } 7247 } 7248 for (i=0;i<n_sends;i++) { 7249 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7250 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7251 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7252 if (nis) { 7253 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7254 } 7255 if (nvecs) { 7256 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7257 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7258 } 7259 } 7260 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7261 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7262 7263 /* assemble new l2g map */ 7264 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7265 ptr_idxs = recv_buffer_idxs; 7266 new_local_rows = 0; 7267 for (i=0;i<n_recvs;i++) { 7268 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7269 ptr_idxs += olengths_idxs[i]; 7270 } 7271 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7272 ptr_idxs = recv_buffer_idxs; 7273 new_local_rows = 0; 7274 for (i=0;i<n_recvs;i++) { 7275 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 7276 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7277 ptr_idxs += olengths_idxs[i]; 7278 } 7279 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7280 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7281 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7282 7283 /* infer new local matrix type from received local matrices type */ 7284 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7285 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7286 if (n_recvs) { 7287 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7288 ptr_idxs = recv_buffer_idxs; 7289 for (i=0;i<n_recvs;i++) { 7290 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7291 new_local_type_private = MATAIJ_PRIVATE; 7292 break; 7293 } 7294 ptr_idxs += olengths_idxs[i]; 7295 } 7296 switch (new_local_type_private) { 7297 case MATDENSE_PRIVATE: 7298 new_local_type = MATSEQAIJ; 7299 bs = 1; 7300 break; 7301 case MATAIJ_PRIVATE: 7302 new_local_type = MATSEQAIJ; 7303 bs = 1; 7304 break; 7305 case MATBAIJ_PRIVATE: 7306 new_local_type = MATSEQBAIJ; 7307 break; 7308 case MATSBAIJ_PRIVATE: 7309 new_local_type = MATSEQSBAIJ; 7310 break; 7311 default: 7312 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7313 break; 7314 } 7315 } else { /* by default, new_local_type is seqaij */ 7316 new_local_type = MATSEQAIJ; 7317 bs = 1; 7318 } 7319 7320 /* create MATIS object if needed */ 7321 if (!reuse) { 7322 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7323 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7324 } else { 7325 /* it also destroys the local matrices */ 7326 if (*mat_n) { 7327 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7328 } else { /* this is a fake object */ 7329 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7330 } 7331 } 7332 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7333 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7334 7335 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7336 7337 /* Global to local map of received indices */ 7338 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7339 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7340 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7341 7342 /* restore attributes -> type of incoming data and its size */ 7343 buf_size_idxs = 0; 7344 for (i=0;i<n_recvs;i++) { 7345 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7346 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7347 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7348 } 7349 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7350 7351 /* set preallocation */ 7352 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7353 if (!newisdense) { 7354 PetscInt *new_local_nnz=0; 7355 7356 ptr_idxs = recv_buffer_idxs_local; 7357 if (n_recvs) { 7358 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7359 } 7360 for (i=0;i<n_recvs;i++) { 7361 PetscInt j; 7362 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7363 for (j=0;j<*(ptr_idxs+1);j++) { 7364 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7365 } 7366 } else { 7367 /* TODO */ 7368 } 7369 ptr_idxs += olengths_idxs[i]; 7370 } 7371 if (new_local_nnz) { 7372 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7373 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7374 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7375 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7376 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7377 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7378 } else { 7379 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7380 } 7381 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7382 } else { 7383 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7384 } 7385 7386 /* set values */ 7387 ptr_vals = recv_buffer_vals; 7388 ptr_idxs = recv_buffer_idxs_local; 7389 for (i=0;i<n_recvs;i++) { 7390 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7391 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7392 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7393 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7394 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7395 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7396 } else { 7397 /* TODO */ 7398 } 7399 ptr_idxs += olengths_idxs[i]; 7400 ptr_vals += olengths_vals[i]; 7401 } 7402 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7403 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7404 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7405 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7406 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7407 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7408 7409 #if 0 7410 if (!restrict_comm) { /* check */ 7411 Vec lvec,rvec; 7412 PetscReal infty_error; 7413 7414 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7415 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7416 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7417 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7418 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7419 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7420 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7421 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7422 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7423 } 7424 #endif 7425 7426 /* assemble new additional is (if any) */ 7427 if (nis) { 7428 PetscInt **temp_idxs,*count_is,j,psum; 7429 7430 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7431 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7432 ptr_idxs = recv_buffer_idxs_is; 7433 psum = 0; 7434 for (i=0;i<n_recvs;i++) { 7435 for (j=0;j<nis;j++) { 7436 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7437 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7438 psum += plen; 7439 ptr_idxs += plen+1; /* shift pointer to received data */ 7440 } 7441 } 7442 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7443 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7444 for (i=1;i<nis;i++) { 7445 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7446 } 7447 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7448 ptr_idxs = recv_buffer_idxs_is; 7449 for (i=0;i<n_recvs;i++) { 7450 for (j=0;j<nis;j++) { 7451 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7452 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7453 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7454 ptr_idxs += plen+1; /* shift pointer to received data */ 7455 } 7456 } 7457 for (i=0;i<nis;i++) { 7458 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7459 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7460 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7461 } 7462 ierr = PetscFree(count_is);CHKERRQ(ierr); 7463 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7464 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7465 } 7466 /* free workspace */ 7467 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7468 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7469 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7470 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7471 if (isdense) { 7472 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7473 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7474 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7475 } else { 7476 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7477 } 7478 if (nis) { 7479 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7480 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7481 } 7482 7483 if (nvecs) { 7484 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7485 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7486 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7487 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7488 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7489 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7490 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7491 /* set values */ 7492 ptr_vals = recv_buffer_vecs; 7493 ptr_idxs = recv_buffer_idxs_local; 7494 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7495 for (i=0;i<n_recvs;i++) { 7496 PetscInt j; 7497 for (j=0;j<*(ptr_idxs+1);j++) { 7498 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7499 } 7500 ptr_idxs += olengths_idxs[i]; 7501 ptr_vals += olengths_idxs[i]-2; 7502 } 7503 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7504 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7505 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7506 } 7507 7508 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7509 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7510 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7511 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7512 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7513 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7514 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7515 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7516 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7517 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7518 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7519 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7520 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7521 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7522 ierr = PetscFree(onodes);CHKERRQ(ierr); 7523 if (nis) { 7524 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7525 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7526 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7527 } 7528 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7529 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7530 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7531 for (i=0;i<nis;i++) { 7532 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7533 } 7534 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7535 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7536 } 7537 *mat_n = NULL; 7538 } 7539 PetscFunctionReturn(0); 7540 } 7541 7542 /* temporary hack into ksp private data structure */ 7543 #include <petsc/private/kspimpl.h> 7544 7545 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7546 { 7547 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7548 PC_IS *pcis = (PC_IS*)pc->data; 7549 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7550 Mat coarsedivudotp = NULL; 7551 Mat coarseG,t_coarse_mat_is; 7552 MatNullSpace CoarseNullSpace = NULL; 7553 ISLocalToGlobalMapping coarse_islg; 7554 IS coarse_is,*isarray; 7555 PetscInt i,im_active=-1,active_procs=-1; 7556 PetscInt nis,nisdofs,nisneu,nisvert; 7557 PC pc_temp; 7558 PCType coarse_pc_type; 7559 KSPType coarse_ksp_type; 7560 PetscBool multilevel_requested,multilevel_allowed; 7561 PetscBool coarse_reuse; 7562 PetscInt ncoarse,nedcfield; 7563 PetscBool compute_vecs = PETSC_FALSE; 7564 PetscScalar *array; 7565 MatReuse coarse_mat_reuse; 7566 PetscBool restr, full_restr, have_void; 7567 PetscMPIInt commsize; 7568 PetscErrorCode ierr; 7569 7570 PetscFunctionBegin; 7571 /* Assign global numbering to coarse dofs */ 7572 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7573 PetscInt ocoarse_size; 7574 compute_vecs = PETSC_TRUE; 7575 7576 pcbddc->new_primal_space = PETSC_TRUE; 7577 ocoarse_size = pcbddc->coarse_size; 7578 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7579 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7580 /* see if we can avoid some work */ 7581 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7582 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7583 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7584 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7585 coarse_reuse = PETSC_FALSE; 7586 } else { /* we can safely reuse already computed coarse matrix */ 7587 coarse_reuse = PETSC_TRUE; 7588 } 7589 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7590 coarse_reuse = PETSC_FALSE; 7591 } 7592 /* reset any subassembling information */ 7593 if (!coarse_reuse || pcbddc->recompute_topography) { 7594 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7595 } 7596 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7597 coarse_reuse = PETSC_TRUE; 7598 } 7599 /* assemble coarse matrix */ 7600 if (coarse_reuse && pcbddc->coarse_ksp) { 7601 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7602 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7603 coarse_mat_reuse = MAT_REUSE_MATRIX; 7604 } else { 7605 coarse_mat = NULL; 7606 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7607 } 7608 7609 /* creates temporary l2gmap and IS for coarse indexes */ 7610 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7611 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7612 7613 /* creates temporary MATIS object for coarse matrix */ 7614 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7615 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7616 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7617 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7618 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7619 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7620 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7621 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7622 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7623 7624 /* count "active" (i.e. with positive local size) and "void" processes */ 7625 im_active = !!(pcis->n); 7626 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7627 7628 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7629 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7630 /* full_restr : just use the receivers from the subassembling pattern */ 7631 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);CHKERRQ(ierr); 7632 coarse_mat_is = NULL; 7633 multilevel_allowed = PETSC_FALSE; 7634 multilevel_requested = PETSC_FALSE; 7635 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7636 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7637 if (multilevel_requested) { 7638 ncoarse = active_procs/pcbddc->coarsening_ratio; 7639 restr = PETSC_FALSE; 7640 full_restr = PETSC_FALSE; 7641 } else { 7642 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7643 restr = PETSC_TRUE; 7644 full_restr = PETSC_TRUE; 7645 } 7646 if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7647 ncoarse = PetscMax(1,ncoarse); 7648 if (!pcbddc->coarse_subassembling) { 7649 if (pcbddc->coarsening_ratio > 1) { 7650 if (multilevel_requested) { 7651 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7652 } else { 7653 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7654 } 7655 } else { 7656 PetscMPIInt rank; 7657 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7658 have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE; 7659 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7660 } 7661 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7662 PetscInt psum; 7663 if (pcbddc->coarse_ksp) psum = 1; 7664 else psum = 0; 7665 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7666 if (ncoarse < commsize) have_void = PETSC_TRUE; 7667 } 7668 /* determine if we can go multilevel */ 7669 if (multilevel_requested) { 7670 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7671 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7672 } 7673 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7674 7675 /* dump subassembling pattern */ 7676 if (pcbddc->dbg_flag && multilevel_allowed) { 7677 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7678 } 7679 7680 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7681 nedcfield = -1; 7682 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7683 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7684 const PetscInt *idxs; 7685 ISLocalToGlobalMapping tmap; 7686 7687 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7688 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7689 /* allocate space for temporary storage */ 7690 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7691 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7692 /* allocate for IS array */ 7693 nisdofs = pcbddc->n_ISForDofsLocal; 7694 if (pcbddc->nedclocal) { 7695 if (pcbddc->nedfield > -1) { 7696 nedcfield = pcbddc->nedfield; 7697 } else { 7698 nedcfield = 0; 7699 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7700 nisdofs = 1; 7701 } 7702 } 7703 nisneu = !!pcbddc->NeumannBoundariesLocal; 7704 nisvert = 0; /* nisvert is not used */ 7705 nis = nisdofs + nisneu + nisvert; 7706 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7707 /* dofs splitting */ 7708 for (i=0;i<nisdofs;i++) { 7709 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7710 if (nedcfield != i) { 7711 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7712 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7713 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7714 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7715 } else { 7716 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7717 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7718 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7719 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7720 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7721 } 7722 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7723 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7724 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7725 } 7726 /* neumann boundaries */ 7727 if (pcbddc->NeumannBoundariesLocal) { 7728 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7729 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7730 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7731 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7732 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7733 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7734 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7735 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7736 } 7737 /* free memory */ 7738 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7739 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7740 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7741 } else { 7742 nis = 0; 7743 nisdofs = 0; 7744 nisneu = 0; 7745 nisvert = 0; 7746 isarray = NULL; 7747 } 7748 /* destroy no longer needed map */ 7749 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7750 7751 /* subassemble */ 7752 if (multilevel_allowed) { 7753 Vec vp[1]; 7754 PetscInt nvecs = 0; 7755 PetscBool reuse,reuser; 7756 7757 if (coarse_mat) reuse = PETSC_TRUE; 7758 else reuse = PETSC_FALSE; 7759 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7760 vp[0] = NULL; 7761 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7762 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7763 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7764 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7765 nvecs = 1; 7766 7767 if (pcbddc->divudotp) { 7768 Mat B,loc_divudotp; 7769 Vec v,p; 7770 IS dummy; 7771 PetscInt np; 7772 7773 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7774 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7775 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7776 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7777 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7778 ierr = VecSet(p,1.);CHKERRQ(ierr); 7779 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7780 ierr = VecDestroy(&p);CHKERRQ(ierr); 7781 ierr = MatDestroy(&B);CHKERRQ(ierr); 7782 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7783 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7784 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7785 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7786 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7787 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7788 ierr = VecDestroy(&v);CHKERRQ(ierr); 7789 } 7790 } 7791 if (reuser) { 7792 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7793 } else { 7794 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7795 } 7796 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7797 PetscScalar *arraym,*arrayv; 7798 PetscInt nl; 7799 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7800 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7801 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7802 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7803 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7804 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7805 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7806 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7807 } else { 7808 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7809 } 7810 } else { 7811 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7812 } 7813 if (coarse_mat_is || coarse_mat) { 7814 PetscMPIInt size; 7815 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7816 if (!multilevel_allowed) { 7817 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7818 } else { 7819 Mat A; 7820 7821 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7822 if (coarse_mat_is) { 7823 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7824 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7825 coarse_mat = coarse_mat_is; 7826 } 7827 /* be sure we don't have MatSeqDENSE as local mat */ 7828 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7829 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7830 } 7831 } 7832 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7833 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7834 7835 /* create local to global scatters for coarse problem */ 7836 if (compute_vecs) { 7837 PetscInt lrows; 7838 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7839 if (coarse_mat) { 7840 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7841 } else { 7842 lrows = 0; 7843 } 7844 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7845 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7846 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7847 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7848 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7849 } 7850 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7851 7852 /* set defaults for coarse KSP and PC */ 7853 if (multilevel_allowed) { 7854 coarse_ksp_type = KSPRICHARDSON; 7855 coarse_pc_type = PCBDDC; 7856 } else { 7857 coarse_ksp_type = KSPPREONLY; 7858 coarse_pc_type = PCREDUNDANT; 7859 } 7860 7861 /* print some info if requested */ 7862 if (pcbddc->dbg_flag) { 7863 if (!multilevel_allowed) { 7864 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7865 if (multilevel_requested) { 7866 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7867 } else if (pcbddc->max_levels) { 7868 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7869 } 7870 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7871 } 7872 } 7873 7874 /* communicate coarse discrete gradient */ 7875 coarseG = NULL; 7876 if (pcbddc->nedcG && multilevel_allowed) { 7877 MPI_Comm ccomm; 7878 if (coarse_mat) { 7879 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7880 } else { 7881 ccomm = MPI_COMM_NULL; 7882 } 7883 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7884 } 7885 7886 /* create the coarse KSP object only once with defaults */ 7887 if (coarse_mat) { 7888 PetscBool isredundant,isnn,isbddc; 7889 PetscViewer dbg_viewer = NULL; 7890 7891 if (pcbddc->dbg_flag) { 7892 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7893 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7894 } 7895 if (!pcbddc->coarse_ksp) { 7896 char prefix[256],str_level[16]; 7897 size_t len; 7898 7899 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7900 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7901 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7902 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7903 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7904 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7905 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7906 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7907 /* TODO is this logic correct? should check for coarse_mat type */ 7908 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7909 /* prefix */ 7910 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7911 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7912 if (!pcbddc->current_level) { 7913 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7914 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7915 } else { 7916 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7917 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7918 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7919 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7920 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 7921 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7922 } 7923 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7924 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7925 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7926 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7927 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7928 /* allow user customization */ 7929 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7930 } 7931 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7932 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7933 if (nisdofs) { 7934 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7935 for (i=0;i<nisdofs;i++) { 7936 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7937 } 7938 } 7939 if (nisneu) { 7940 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7941 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7942 } 7943 if (nisvert) { 7944 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7945 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7946 } 7947 if (coarseG) { 7948 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7949 } 7950 7951 /* get some info after set from options */ 7952 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7953 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 7954 if (isbddc && !multilevel_allowed) { 7955 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7956 isbddc = PETSC_FALSE; 7957 } 7958 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 7959 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7960 if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) { 7961 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 7962 isbddc = PETSC_TRUE; 7963 } 7964 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7965 if (isredundant) { 7966 KSP inner_ksp; 7967 PC inner_pc; 7968 7969 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7970 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7971 } 7972 7973 /* parameters which miss an API */ 7974 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7975 if (isbddc) { 7976 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7977 7978 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7979 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7980 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7981 if (pcbddc_coarse->benign_saddle_point) { 7982 Mat coarsedivudotp_is; 7983 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7984 IS row,col; 7985 const PetscInt *gidxs; 7986 PetscInt n,st,M,N; 7987 7988 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7989 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7990 st = st-n; 7991 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7992 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7993 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7994 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7995 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7996 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7997 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7998 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7999 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 8000 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 8001 ierr = ISDestroy(&row);CHKERRQ(ierr); 8002 ierr = ISDestroy(&col);CHKERRQ(ierr); 8003 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 8004 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8005 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8006 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8007 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8008 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8009 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8010 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8011 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8012 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8013 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8014 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8015 } 8016 } 8017 8018 /* propagate symmetry info of coarse matrix */ 8019 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8020 if (pc->pmat->symmetric_set) { 8021 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8022 } 8023 if (pc->pmat->hermitian_set) { 8024 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8025 } 8026 if (pc->pmat->spd_set) { 8027 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8028 } 8029 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8030 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8031 } 8032 /* set operators */ 8033 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8034 if (pcbddc->dbg_flag) { 8035 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8036 } 8037 } 8038 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8039 ierr = PetscFree(isarray);CHKERRQ(ierr); 8040 #if 0 8041 { 8042 PetscViewer viewer; 8043 char filename[256]; 8044 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8045 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8046 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8047 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8048 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8049 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8050 } 8051 #endif 8052 8053 if (pcbddc->coarse_ksp) { 8054 Vec crhs,csol; 8055 8056 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8057 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8058 if (!csol) { 8059 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8060 } 8061 if (!crhs) { 8062 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8063 } 8064 } 8065 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8066 8067 /* compute null space for coarse solver if the benign trick has been requested */ 8068 if (pcbddc->benign_null) { 8069 8070 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8071 for (i=0;i<pcbddc->benign_n;i++) { 8072 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8073 } 8074 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8075 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8076 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8077 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8078 if (coarse_mat) { 8079 Vec nullv; 8080 PetscScalar *array,*array2; 8081 PetscInt nl; 8082 8083 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8084 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8085 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8086 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8087 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 8088 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8089 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8090 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8091 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8092 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8093 } 8094 } 8095 8096 if (pcbddc->coarse_ksp) { 8097 PetscBool ispreonly; 8098 8099 if (CoarseNullSpace) { 8100 PetscBool isnull; 8101 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8102 if (isnull) { 8103 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8104 } 8105 /* TODO: add local nullspaces (if any) */ 8106 } 8107 /* setup coarse ksp */ 8108 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8109 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8110 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8111 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 8112 KSP check_ksp; 8113 KSPType check_ksp_type; 8114 PC check_pc; 8115 Vec check_vec,coarse_vec; 8116 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8117 PetscInt its; 8118 PetscBool compute_eigs; 8119 PetscReal *eigs_r,*eigs_c; 8120 PetscInt neigs; 8121 const char *prefix; 8122 8123 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8124 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8125 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8126 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8127 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8128 /* prevent from setup unneeded object */ 8129 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8130 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8131 if (ispreonly) { 8132 check_ksp_type = KSPPREONLY; 8133 compute_eigs = PETSC_FALSE; 8134 } else { 8135 check_ksp_type = KSPGMRES; 8136 compute_eigs = PETSC_TRUE; 8137 } 8138 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8139 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8140 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8141 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8142 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8143 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8144 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8145 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8146 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8147 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8148 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8149 /* create random vec */ 8150 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8151 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8152 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8153 /* solve coarse problem */ 8154 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8155 /* set eigenvalue estimation if preonly has not been requested */ 8156 if (compute_eigs) { 8157 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8158 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8159 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8160 if (neigs) { 8161 lambda_max = eigs_r[neigs-1]; 8162 lambda_min = eigs_r[0]; 8163 if (pcbddc->use_coarse_estimates) { 8164 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8165 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8166 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8167 } 8168 } 8169 } 8170 } 8171 8172 /* check coarse problem residual error */ 8173 if (pcbddc->dbg_flag) { 8174 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8175 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8176 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8177 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8178 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8179 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8180 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8181 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8182 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8183 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8184 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8185 if (CoarseNullSpace) { 8186 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8187 } 8188 if (compute_eigs) { 8189 PetscReal lambda_max_s,lambda_min_s; 8190 KSPConvergedReason reason; 8191 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8192 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8193 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8194 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8195 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8196 for (i=0;i<neigs;i++) { 8197 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8198 } 8199 } 8200 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8201 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8202 } 8203 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8204 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8205 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8206 if (compute_eigs) { 8207 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8208 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8209 } 8210 } 8211 } 8212 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8213 /* print additional info */ 8214 if (pcbddc->dbg_flag) { 8215 /* waits until all processes reaches this point */ 8216 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8217 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 8218 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8219 } 8220 8221 /* free memory */ 8222 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8223 PetscFunctionReturn(0); 8224 } 8225 8226 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8227 { 8228 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8229 PC_IS* pcis = (PC_IS*)pc->data; 8230 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8231 IS subset,subset_mult,subset_n; 8232 PetscInt local_size,coarse_size=0; 8233 PetscInt *local_primal_indices=NULL; 8234 const PetscInt *t_local_primal_indices; 8235 PetscErrorCode ierr; 8236 8237 PetscFunctionBegin; 8238 /* Compute global number of coarse dofs */ 8239 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8240 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8241 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8242 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8243 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8244 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8245 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8246 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8247 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8248 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8249 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8250 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8251 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 8252 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8253 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8254 8255 /* check numbering */ 8256 if (pcbddc->dbg_flag) { 8257 PetscScalar coarsesum,*array,*array2; 8258 PetscInt i; 8259 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8260 8261 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8262 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8263 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8264 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8265 /* counter */ 8266 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8267 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8268 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8269 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8270 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8271 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8272 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8273 for (i=0;i<pcbddc->local_primal_size;i++) { 8274 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8275 } 8276 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8277 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8278 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8279 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8280 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8281 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8282 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8283 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8284 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8285 for (i=0;i<pcis->n;i++) { 8286 if (array[i] != 0.0 && array[i] != array2[i]) { 8287 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8288 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8289 set_error = PETSC_TRUE; 8290 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8291 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8292 } 8293 } 8294 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8295 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8296 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8297 for (i=0;i<pcis->n;i++) { 8298 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8299 } 8300 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8301 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8302 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8303 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8304 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8305 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8306 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8307 PetscInt *gidxs; 8308 8309 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8310 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8311 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8312 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8313 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8314 for (i=0;i<pcbddc->local_primal_size;i++) { 8315 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8316 } 8317 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8318 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8319 } 8320 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8321 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8322 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8323 } 8324 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 8325 /* get back data */ 8326 *coarse_size_n = coarse_size; 8327 *local_primal_indices_n = local_primal_indices; 8328 PetscFunctionReturn(0); 8329 } 8330 8331 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8332 { 8333 IS localis_t; 8334 PetscInt i,lsize,*idxs,n; 8335 PetscScalar *vals; 8336 PetscErrorCode ierr; 8337 8338 PetscFunctionBegin; 8339 /* get indices in local ordering exploiting local to global map */ 8340 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8341 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8342 for (i=0;i<lsize;i++) vals[i] = 1.0; 8343 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8344 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8345 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8346 if (idxs) { /* multilevel guard */ 8347 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 8348 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8349 } 8350 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8351 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8352 ierr = PetscFree(vals);CHKERRQ(ierr); 8353 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8354 /* now compute set in local ordering */ 8355 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8356 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8357 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8358 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8359 for (i=0,lsize=0;i<n;i++) { 8360 if (PetscRealPart(vals[i]) > 0.5) { 8361 lsize++; 8362 } 8363 } 8364 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8365 for (i=0,lsize=0;i<n;i++) { 8366 if (PetscRealPart(vals[i]) > 0.5) { 8367 idxs[lsize++] = i; 8368 } 8369 } 8370 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8371 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8372 *localis = localis_t; 8373 PetscFunctionReturn(0); 8374 } 8375 8376 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8377 { 8378 PC_IS *pcis=(PC_IS*)pc->data; 8379 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8380 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8381 Mat S_j; 8382 PetscInt *used_xadj,*used_adjncy; 8383 PetscBool free_used_adj; 8384 PetscErrorCode ierr; 8385 8386 PetscFunctionBegin; 8387 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8388 free_used_adj = PETSC_FALSE; 8389 if (pcbddc->sub_schurs_layers == -1) { 8390 used_xadj = NULL; 8391 used_adjncy = NULL; 8392 } else { 8393 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8394 used_xadj = pcbddc->mat_graph->xadj; 8395 used_adjncy = pcbddc->mat_graph->adjncy; 8396 } else if (pcbddc->computed_rowadj) { 8397 used_xadj = pcbddc->mat_graph->xadj; 8398 used_adjncy = pcbddc->mat_graph->adjncy; 8399 } else { 8400 PetscBool flg_row=PETSC_FALSE; 8401 const PetscInt *xadj,*adjncy; 8402 PetscInt nvtxs; 8403 8404 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8405 if (flg_row) { 8406 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8407 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8408 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8409 free_used_adj = PETSC_TRUE; 8410 } else { 8411 pcbddc->sub_schurs_layers = -1; 8412 used_xadj = NULL; 8413 used_adjncy = NULL; 8414 } 8415 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8416 } 8417 } 8418 8419 /* setup sub_schurs data */ 8420 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8421 if (!sub_schurs->schur_explicit) { 8422 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8423 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8424 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8425 } else { 8426 Mat change = NULL; 8427 Vec scaling = NULL; 8428 IS change_primal = NULL, iP; 8429 PetscInt benign_n; 8430 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8431 PetscBool isseqaij,need_change = PETSC_FALSE; 8432 PetscBool discrete_harmonic = PETSC_FALSE; 8433 8434 if (!pcbddc->use_vertices && reuse_solvers) { 8435 PetscInt n_vertices; 8436 8437 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8438 reuse_solvers = (PetscBool)!n_vertices; 8439 } 8440 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8441 if (!isseqaij) { 8442 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8443 if (matis->A == pcbddc->local_mat) { 8444 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8445 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8446 } else { 8447 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8448 } 8449 } 8450 if (!pcbddc->benign_change_explicit) { 8451 benign_n = pcbddc->benign_n; 8452 } else { 8453 benign_n = 0; 8454 } 8455 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8456 We need a global reduction to avoid possible deadlocks. 8457 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8458 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8459 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8460 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8461 need_change = (PetscBool)(!need_change); 8462 } 8463 /* If the user defines additional constraints, we import them here. 8464 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8465 if (need_change) { 8466 PC_IS *pcisf; 8467 PC_BDDC *pcbddcf; 8468 PC pcf; 8469 8470 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8471 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8472 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8473 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8474 8475 /* hacks */ 8476 pcisf = (PC_IS*)pcf->data; 8477 pcisf->is_B_local = pcis->is_B_local; 8478 pcisf->vec1_N = pcis->vec1_N; 8479 pcisf->BtoNmap = pcis->BtoNmap; 8480 pcisf->n = pcis->n; 8481 pcisf->n_B = pcis->n_B; 8482 pcbddcf = (PC_BDDC*)pcf->data; 8483 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8484 pcbddcf->mat_graph = pcbddc->mat_graph; 8485 pcbddcf->use_faces = PETSC_TRUE; 8486 pcbddcf->use_change_of_basis = PETSC_TRUE; 8487 pcbddcf->use_change_on_faces = PETSC_TRUE; 8488 pcbddcf->use_qr_single = PETSC_TRUE; 8489 pcbddcf->fake_change = PETSC_TRUE; 8490 8491 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8492 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8493 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8494 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8495 change = pcbddcf->ConstraintMatrix; 8496 pcbddcf->ConstraintMatrix = NULL; 8497 8498 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8499 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8500 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8501 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8502 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8503 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8504 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8505 pcf->ops->destroy = NULL; 8506 pcf->ops->reset = NULL; 8507 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8508 } 8509 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8510 8511 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 8512 if (iP) { 8513 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 8514 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 8515 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8516 } 8517 if (discrete_harmonic) { 8518 Mat A; 8519 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 8520 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 8521 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 8522 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8523 ierr = MatDestroy(&A);CHKERRQ(ierr); 8524 } else { 8525 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8526 } 8527 ierr = MatDestroy(&change);CHKERRQ(ierr); 8528 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8529 } 8530 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8531 8532 /* free adjacency */ 8533 if (free_used_adj) { 8534 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8535 } 8536 PetscFunctionReturn(0); 8537 } 8538 8539 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8540 { 8541 PC_IS *pcis=(PC_IS*)pc->data; 8542 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8543 PCBDDCGraph graph; 8544 PetscErrorCode ierr; 8545 8546 PetscFunctionBegin; 8547 /* attach interface graph for determining subsets */ 8548 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8549 IS verticesIS,verticescomm; 8550 PetscInt vsize,*idxs; 8551 8552 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8553 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8554 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8555 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8556 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8557 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8558 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8559 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8560 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8561 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8562 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8563 } else { 8564 graph = pcbddc->mat_graph; 8565 } 8566 /* print some info */ 8567 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8568 IS vertices; 8569 PetscInt nv,nedges,nfaces; 8570 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8571 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8572 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8573 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8574 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8575 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8576 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8577 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8578 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8579 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8580 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8581 } 8582 8583 /* sub_schurs init */ 8584 if (!pcbddc->sub_schurs) { 8585 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8586 } 8587 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8588 8589 /* free graph struct */ 8590 if (pcbddc->sub_schurs_rebuild) { 8591 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8592 } 8593 PetscFunctionReturn(0); 8594 } 8595 8596 PetscErrorCode PCBDDCCheckOperator(PC pc) 8597 { 8598 PC_IS *pcis=(PC_IS*)pc->data; 8599 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8600 PetscErrorCode ierr; 8601 8602 PetscFunctionBegin; 8603 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8604 IS zerodiag = NULL; 8605 Mat S_j,B0_B=NULL; 8606 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8607 PetscScalar *p0_check,*array,*array2; 8608 PetscReal norm; 8609 PetscInt i; 8610 8611 /* B0 and B0_B */ 8612 if (zerodiag) { 8613 IS dummy; 8614 8615 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8616 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8617 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8618 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8619 } 8620 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8621 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8622 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8623 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8624 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8625 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8626 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8627 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8628 /* S_j */ 8629 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8630 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8631 8632 /* mimic vector in \widetilde{W}_\Gamma */ 8633 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8634 /* continuous in primal space */ 8635 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8636 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8637 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8638 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8639 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8640 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8641 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8642 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8643 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8644 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8645 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8646 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8647 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8648 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8649 8650 /* assemble rhs for coarse problem */ 8651 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8652 /* local with Schur */ 8653 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8654 if (zerodiag) { 8655 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8656 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8657 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8658 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8659 } 8660 /* sum on primal nodes the local contributions */ 8661 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8662 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8663 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8664 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8665 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8666 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8667 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8668 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8669 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8670 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8671 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8672 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8673 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8674 /* scale primal nodes (BDDC sums contibutions) */ 8675 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8676 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8677 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8678 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8679 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8680 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8681 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8682 /* global: \widetilde{B0}_B w_\Gamma */ 8683 if (zerodiag) { 8684 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8685 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8686 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8687 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8688 } 8689 /* BDDC */ 8690 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8691 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8692 8693 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8694 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8695 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8696 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8697 for (i=0;i<pcbddc->benign_n;i++) { 8698 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8699 } 8700 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8701 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8702 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8703 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8704 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8705 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8706 } 8707 PetscFunctionReturn(0); 8708 } 8709 8710 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8711 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8712 { 8713 Mat At; 8714 IS rows; 8715 PetscInt rst,ren; 8716 PetscErrorCode ierr; 8717 PetscLayout rmap; 8718 8719 PetscFunctionBegin; 8720 rst = ren = 0; 8721 if (ccomm != MPI_COMM_NULL) { 8722 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8723 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8724 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8725 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8726 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8727 } 8728 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8729 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8730 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8731 8732 if (ccomm != MPI_COMM_NULL) { 8733 Mat_MPIAIJ *a,*b; 8734 IS from,to; 8735 Vec gvec; 8736 PetscInt lsize; 8737 8738 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8739 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8740 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8741 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8742 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8743 a = (Mat_MPIAIJ*)At->data; 8744 b = (Mat_MPIAIJ*)(*B)->data; 8745 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8746 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8747 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8748 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8749 b->A = a->A; 8750 b->B = a->B; 8751 8752 b->donotstash = a->donotstash; 8753 b->roworiented = a->roworiented; 8754 b->rowindices = 0; 8755 b->rowvalues = 0; 8756 b->getrowactive = PETSC_FALSE; 8757 8758 (*B)->rmap = rmap; 8759 (*B)->factortype = A->factortype; 8760 (*B)->assembled = PETSC_TRUE; 8761 (*B)->insertmode = NOT_SET_VALUES; 8762 (*B)->preallocated = PETSC_TRUE; 8763 8764 if (a->colmap) { 8765 #if defined(PETSC_USE_CTABLE) 8766 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8767 #else 8768 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8769 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8770 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8771 #endif 8772 } else b->colmap = 0; 8773 if (a->garray) { 8774 PetscInt len; 8775 len = a->B->cmap->n; 8776 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8777 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8778 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8779 } else b->garray = 0; 8780 8781 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8782 b->lvec = a->lvec; 8783 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8784 8785 /* cannot use VecScatterCopy */ 8786 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8787 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8788 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8789 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8790 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8791 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8792 ierr = ISDestroy(&from);CHKERRQ(ierr); 8793 ierr = ISDestroy(&to);CHKERRQ(ierr); 8794 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8795 } 8796 ierr = MatDestroy(&At);CHKERRQ(ierr); 8797 PetscFunctionReturn(0); 8798 } 8799