1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 #if !defined(PETSC_USE_COMPLEX) 18 PetscScalar *uwork,*data,*U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM,bN,lwork,lierr,di = 1; 21 PetscInt ulw,i,nr,nc,n; 22 PetscErrorCode ierr; 23 24 PetscFunctionBegin; 25 #if defined(PETSC_MISSING_LAPACK_GESVD) 26 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 27 #else 28 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 29 if (!nr || !nc) PetscFunctionReturn(0); 30 31 /* workspace */ 32 if (!work) { 33 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 34 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 35 } else { 36 ulw = lw; 37 uwork = work; 38 } 39 n = PetscMin(nr,nc); 40 if (!rwork) { 41 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 42 } else { 43 sing = rwork; 44 } 45 46 /* SVD */ 47 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 49 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 50 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 51 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 52 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 53 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 54 ierr = PetscFPTrapPop();CHKERRQ(ierr); 55 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 56 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 57 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 58 if (!rwork) { 59 ierr = PetscFree(sing);CHKERRQ(ierr); 60 } 61 if (!work) { 62 ierr = PetscFree(uwork);CHKERRQ(ierr); 63 } 64 /* create B */ 65 if (!range) { 66 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 67 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 68 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 69 } else { 70 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 71 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 72 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 73 } 74 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 75 ierr = PetscFree(U);CHKERRQ(ierr); 76 #endif 77 #else /* PETSC_USE_COMPLEX */ 78 PetscFunctionBegin; 79 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 80 #endif 81 PetscFunctionReturn(0); 82 } 83 84 /* TODO REMOVE */ 85 #if defined(PRINT_GDET) 86 static int inc = 0; 87 static int lev = 0; 88 #endif 89 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 PetscScalar *vals,v; 122 123 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 124 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 125 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 126 /* v = PetscAbsScalar(vals[0]) */; 127 v = 1.; 128 cvals[0] = vals[0]/v; 129 cvals[1] = vals[1]/v; 130 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 131 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 132 #if defined(PRINT_GDET) 133 { 134 PetscViewer viewer; 135 char filename[256]; 136 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 137 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 138 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 140 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 141 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 142 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 143 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 144 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 145 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 146 } 147 #endif 148 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 149 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 150 } 151 152 PetscFunctionReturn(0); 153 } 154 155 PetscErrorCode PCBDDCNedelecSupport(PC pc) 156 { 157 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 158 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 159 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 160 Vec tvec; 161 PetscSF sfv; 162 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 163 MPI_Comm comm; 164 IS lned,primals,allprimals,nedfieldlocal; 165 IS *eedges,*extrows,*extcols,*alleedges; 166 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 167 PetscScalar *vals,*work; 168 PetscReal *rwork; 169 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 170 PetscInt ne,nv,Lv,order,n,field; 171 PetscInt n_neigh,*neigh,*n_shared,**shared; 172 PetscInt i,j,extmem,cum,maxsize,nee; 173 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 174 PetscInt *sfvleaves,*sfvroots; 175 PetscInt *corners,*cedges; 176 PetscInt *ecount,**eneighs,*vcount,**vneighs; 177 #if defined(PETSC_USE_DEBUG) 178 PetscInt *emarks; 179 #endif 180 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 181 PetscErrorCode ierr; 182 183 PetscFunctionBegin; 184 /* If the discrete gradient is defined for a subset of dofs and global is true, 185 it assumes G is given in global ordering for all the dofs. 186 Otherwise, the ordering is global for the Nedelec field */ 187 order = pcbddc->nedorder; 188 conforming = pcbddc->conforming; 189 field = pcbddc->nedfield; 190 global = pcbddc->nedglobal; 191 setprimal = PETSC_FALSE; 192 print = PETSC_FALSE; 193 singular = PETSC_FALSE; 194 195 /* Command line customization */ 196 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 199 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 200 /* print debug info TODO: to be removed */ 201 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 202 ierr = PetscOptionsEnd();CHKERRQ(ierr); 203 204 /* Return if there are no edges in the decomposition and the problem is not singular */ 205 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 206 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 207 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 208 if (!singular) { 209 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 210 lrc[0] = PETSC_FALSE; 211 for (i=0;i<n;i++) { 212 if (PetscRealPart(vals[i]) > 2.) { 213 lrc[0] = PETSC_TRUE; 214 break; 215 } 216 } 217 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 218 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 219 if (!lrc[1]) PetscFunctionReturn(0); 220 } 221 222 /* Get Nedelec field */ 223 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 224 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 225 if (pcbddc->n_ISForDofsLocal && field >= 0) { 226 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 227 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 228 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 229 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 230 ne = n; 231 nedfieldlocal = NULL; 232 global = PETSC_TRUE; 233 } else if (field == PETSC_DECIDE) { 234 PetscInt rst,ren,*idx; 235 236 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 237 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 238 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 239 for (i=rst;i<ren;i++) { 240 PetscInt nc; 241 242 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 243 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 244 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 245 } 246 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 247 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 248 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 249 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 250 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 251 } else { 252 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 253 } 254 255 /* Sanity checks */ 256 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 257 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 258 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 259 260 /* Just set primal dofs and return */ 261 if (setprimal) { 262 IS enedfieldlocal; 263 PetscInt *eidxs; 264 265 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 266 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 267 if (nedfieldlocal) { 268 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 269 for (i=0,cum=0;i<ne;i++) { 270 if (PetscRealPart(vals[idxs[i]]) > 2.) { 271 eidxs[cum++] = idxs[i]; 272 } 273 } 274 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 275 } else { 276 for (i=0,cum=0;i<ne;i++) { 277 if (PetscRealPart(vals[i]) > 2.) { 278 eidxs[cum++] = i; 279 } 280 } 281 } 282 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 283 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 284 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 285 ierr = PetscFree(eidxs);CHKERRQ(ierr); 286 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 287 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 288 PetscFunctionReturn(0); 289 } 290 291 /* Compute some l2g maps */ 292 if (nedfieldlocal) { 293 IS is; 294 295 /* need to map from the local Nedelec field to local numbering */ 296 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 297 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 298 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 299 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 300 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 301 if (global) { 302 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 303 el2g = al2g; 304 } else { 305 IS gis; 306 307 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 308 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 309 ierr = ISDestroy(&gis);CHKERRQ(ierr); 310 } 311 ierr = ISDestroy(&is);CHKERRQ(ierr); 312 } else { 313 /* restore default */ 314 pcbddc->nedfield = -1; 315 /* one ref for the destruction of al2g, one for el2g */ 316 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 317 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 318 el2g = al2g; 319 fl2g = NULL; 320 } 321 322 /* Start communication to drop connections for interior edges (for cc analysis only) */ 323 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 324 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 325 if (nedfieldlocal) { 326 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 327 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 328 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 329 } else { 330 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 331 } 332 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 333 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 334 335 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 336 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 337 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 338 if (global) { 339 PetscInt rst; 340 341 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 342 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 343 if (matis->sf_rootdata[i] < 2) { 344 matis->sf_rootdata[cum++] = i + rst; 345 } 346 } 347 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 348 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 349 } else { 350 PetscInt *tbz; 351 352 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 353 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 354 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 355 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 356 for (i=0,cum=0;i<ne;i++) 357 if (matis->sf_leafdata[idxs[i]] == 1) 358 tbz[cum++] = i; 359 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 360 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 361 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 362 ierr = PetscFree(tbz);CHKERRQ(ierr); 363 } 364 } else { /* we need the entire G to infer the nullspace */ 365 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 366 G = pcbddc->discretegradient; 367 } 368 369 /* Extract subdomain relevant rows of G */ 370 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 371 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 372 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 373 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 374 ierr = ISDestroy(&lned);CHKERRQ(ierr); 375 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 376 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 377 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 378 379 /* SF for nodal dofs communications */ 380 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 381 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 382 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 384 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 386 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 387 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 388 i = singular ? 2 : 1; 389 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 390 391 /* Destroy temporary G created in MATIS format and modified G */ 392 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 393 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 394 ierr = MatDestroy(&G);CHKERRQ(ierr); 395 396 if (print) { 397 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 398 ierr = MatView(lG,NULL);CHKERRQ(ierr); 399 } 400 401 /* Save lG for values insertion in change of basis */ 402 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 403 404 /* Analyze the edge-nodes connections (duplicate lG) */ 405 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 406 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 407 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 409 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 410 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 411 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 412 /* need to import the boundary specification to ensure the 413 proper detection of coarse edges' endpoints */ 414 if (pcbddc->DirichletBoundariesLocal) { 415 IS is; 416 417 if (fl2g) { 418 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 419 } else { 420 is = pcbddc->DirichletBoundariesLocal; 421 } 422 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 423 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 424 for (i=0;i<cum;i++) { 425 if (idxs[i] >= 0) { 426 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 427 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 428 } 429 } 430 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 431 if (fl2g) { 432 ierr = ISDestroy(&is);CHKERRQ(ierr); 433 } 434 } 435 if (pcbddc->NeumannBoundariesLocal) { 436 IS is; 437 438 if (fl2g) { 439 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 440 } else { 441 is = pcbddc->NeumannBoundariesLocal; 442 } 443 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 444 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 445 for (i=0;i<cum;i++) { 446 if (idxs[i] >= 0) { 447 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 448 } 449 } 450 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 451 if (fl2g) { 452 ierr = ISDestroy(&is);CHKERRQ(ierr); 453 } 454 } 455 456 /* Count neighs per dof */ 457 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 458 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 459 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 460 for (i=1,cum=0;i<n_neigh;i++) { 461 cum += n_shared[i]; 462 for (j=0;j<n_shared[i];j++) { 463 ecount[shared[i][j]]++; 464 } 465 } 466 if (ne) { 467 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 468 } 469 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 470 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 471 for (i=1;i<n_neigh;i++) { 472 for (j=0;j<n_shared[i];j++) { 473 PetscInt k = shared[i][j]; 474 eneighs[k][ecount[k]] = neigh[i]; 475 ecount[k]++; 476 } 477 } 478 for (i=0;i<ne;i++) { 479 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 480 } 481 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 482 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 483 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 484 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 485 for (i=1,cum=0;i<n_neigh;i++) { 486 cum += n_shared[i]; 487 for (j=0;j<n_shared[i];j++) { 488 vcount[shared[i][j]]++; 489 } 490 } 491 if (nv) { 492 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 493 } 494 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 495 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 496 for (i=1;i<n_neigh;i++) { 497 for (j=0;j<n_shared[i];j++) { 498 PetscInt k = shared[i][j]; 499 vneighs[k][vcount[k]] = neigh[i]; 500 vcount[k]++; 501 } 502 } 503 for (i=0;i<nv;i++) { 504 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 505 } 506 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 507 508 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 509 for proper detection of coarse edges' endpoints */ 510 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 511 for (i=0;i<ne;i++) { 512 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 513 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 514 } 515 } 516 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 517 if (!conforming) { 518 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 519 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 520 } 521 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 522 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 523 cum = 0; 524 for (i=0;i<ne;i++) { 525 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 526 if (!PetscBTLookup(btee,i)) { 527 marks[cum++] = i; 528 continue; 529 } 530 /* set badly connected edge dofs as primal */ 531 if (!conforming) { 532 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 533 marks[cum++] = i; 534 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 535 for (j=ii[i];j<ii[i+1];j++) { 536 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 537 } 538 } else { 539 /* every edge dofs should be connected trough a certain number of nodal dofs 540 to other edge dofs belonging to coarse edges 541 - at most 2 endpoints 542 - order-1 interior nodal dofs 543 - no undefined nodal dofs (nconn < order) 544 */ 545 PetscInt ends = 0,ints = 0, undef = 0; 546 for (j=ii[i];j<ii[i+1];j++) { 547 PetscInt v = jj[j],k; 548 PetscInt nconn = iit[v+1]-iit[v]; 549 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 550 if (nconn > order) ends++; 551 else if (nconn == order) ints++; 552 else undef++; 553 } 554 if (undef || ends > 2 || ints != order -1) { 555 marks[cum++] = i; 556 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 557 for (j=ii[i];j<ii[i+1];j++) { 558 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 559 } 560 } 561 } 562 } 563 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 564 if (!order && ii[i+1] != ii[i]) { 565 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 566 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 567 } 568 } 569 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 570 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 571 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 572 if (!conforming) { 573 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 574 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 575 } 576 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 577 578 /* identify splitpoints and corner candidates */ 579 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 580 if (print) { 581 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 582 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 583 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 584 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 585 } 586 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 587 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 588 for (i=0;i<nv;i++) { 589 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 590 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 591 if (!order) { /* variable order */ 592 PetscReal vorder = 0.; 593 594 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 595 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 596 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 597 ord = 1; 598 } 599 #if defined(PETSC_USE_DEBUG) 600 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 601 #endif 602 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 603 if (PetscBTLookup(btbd,jj[j])) { 604 bdir = PETSC_TRUE; 605 break; 606 } 607 if (vc != ecount[jj[j]]) { 608 sneighs = PETSC_FALSE; 609 } else { 610 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 611 for (k=0;k<vc;k++) { 612 if (vn[k] != en[k]) { 613 sneighs = PETSC_FALSE; 614 break; 615 } 616 } 617 } 618 } 619 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 620 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 621 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 622 } else if (test == ord) { 623 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 624 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 625 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 626 } else { 627 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 628 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 629 } 630 } 631 } 632 ierr = PetscFree(ecount);CHKERRQ(ierr); 633 ierr = PetscFree(vcount);CHKERRQ(ierr); 634 if (ne) { 635 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 636 } 637 if (nv) { 638 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 639 } 640 ierr = PetscFree(eneighs);CHKERRQ(ierr); 641 ierr = PetscFree(vneighs);CHKERRQ(ierr); 642 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 643 644 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 645 if (order != 1) { 646 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 647 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 648 for (i=0;i<nv;i++) { 649 if (PetscBTLookup(btvcand,i)) { 650 PetscBool found = PETSC_FALSE; 651 for (j=ii[i];j<ii[i+1] && !found;j++) { 652 PetscInt k,e = jj[j]; 653 if (PetscBTLookup(bte,e)) continue; 654 for (k=iit[e];k<iit[e+1];k++) { 655 PetscInt v = jjt[k]; 656 if (v != i && PetscBTLookup(btvcand,v)) { 657 found = PETSC_TRUE; 658 break; 659 } 660 } 661 } 662 if (!found) { 663 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 664 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 665 } else { 666 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 667 } 668 } 669 } 670 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 671 } 672 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 673 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 674 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 675 676 /* Get the local G^T explicitly */ 677 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 678 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 679 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 680 681 /* Mark interior nodal dofs */ 682 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 683 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 684 for (i=1;i<n_neigh;i++) { 685 for (j=0;j<n_shared[i];j++) { 686 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 687 } 688 } 689 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 690 691 /* communicate corners and splitpoints */ 692 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 693 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 694 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 695 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 696 697 if (print) { 698 IS tbz; 699 700 cum = 0; 701 for (i=0;i<nv;i++) 702 if (sfvleaves[i]) 703 vmarks[cum++] = i; 704 705 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 706 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 707 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 708 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 709 } 710 711 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 712 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 713 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 714 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 715 716 /* Zero rows of lGt corresponding to identified corners 717 and interior nodal dofs */ 718 cum = 0; 719 for (i=0;i<nv;i++) { 720 if (sfvleaves[i]) { 721 vmarks[cum++] = i; 722 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 723 } 724 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 725 } 726 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 727 if (print) { 728 IS tbz; 729 730 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 731 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 732 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 733 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 734 } 735 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 736 ierr = PetscFree(vmarks);CHKERRQ(ierr); 737 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 738 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 739 740 /* Recompute G */ 741 ierr = MatDestroy(&lG);CHKERRQ(ierr); 742 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 743 if (print) { 744 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 745 ierr = MatView(lG,NULL);CHKERRQ(ierr); 746 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 747 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 748 } 749 750 /* Get primal dofs (if any) */ 751 cum = 0; 752 for (i=0;i<ne;i++) { 753 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 754 } 755 if (fl2g) { 756 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 757 } 758 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 759 if (print) { 760 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 761 ierr = ISView(primals,NULL);CHKERRQ(ierr); 762 } 763 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 764 /* TODO: what if the user passed in some of them ? */ 765 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 766 ierr = ISDestroy(&primals);CHKERRQ(ierr); 767 768 /* Compute edge connectivity */ 769 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 770 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 771 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 772 if (fl2g) { 773 PetscBT btf; 774 PetscInt *iia,*jja,*iiu,*jju; 775 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 776 777 /* create CSR for all local dofs */ 778 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 779 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 780 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 781 iiu = pcbddc->mat_graph->xadj; 782 jju = pcbddc->mat_graph->adjncy; 783 } else if (pcbddc->use_local_adj) { 784 rest = PETSC_TRUE; 785 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 786 } else { 787 free = PETSC_TRUE; 788 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 789 iiu[0] = 0; 790 for (i=0;i<n;i++) { 791 iiu[i+1] = i+1; 792 jju[i] = -1; 793 } 794 } 795 796 /* import sizes of CSR */ 797 iia[0] = 0; 798 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 799 800 /* overwrite entries corresponding to the Nedelec field */ 801 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 802 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 803 for (i=0;i<ne;i++) { 804 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 805 iia[idxs[i]+1] = ii[i+1]-ii[i]; 806 } 807 808 /* iia in CSR */ 809 for (i=0;i<n;i++) iia[i+1] += iia[i]; 810 811 /* jja in CSR */ 812 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 813 for (i=0;i<n;i++) 814 if (!PetscBTLookup(btf,i)) 815 for (j=0;j<iiu[i+1]-iiu[i];j++) 816 jja[iia[i]+j] = jju[iiu[i]+j]; 817 818 /* map edge dofs connectivity */ 819 if (jj) { 820 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 821 for (i=0;i<ne;i++) { 822 PetscInt e = idxs[i]; 823 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 824 } 825 } 826 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 827 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 828 if (rest) { 829 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 830 } 831 if (free) { 832 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 833 } 834 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 835 } else { 836 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 837 } 838 839 /* Analyze interface for edge dofs */ 840 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 841 pcbddc->mat_graph->twodim = PETSC_FALSE; 842 843 /* Get coarse edges in the edge space */ 844 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 845 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 846 847 if (fl2g) { 848 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 849 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 850 for (i=0;i<nee;i++) { 851 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 852 } 853 } else { 854 eedges = alleedges; 855 primals = allprimals; 856 } 857 858 /* Mark fine edge dofs with their coarse edge id */ 859 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 860 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 861 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 862 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 863 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 864 if (print) { 865 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 866 ierr = ISView(primals,NULL);CHKERRQ(ierr); 867 } 868 869 maxsize = 0; 870 for (i=0;i<nee;i++) { 871 PetscInt size,mark = i+1; 872 873 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 874 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 875 for (j=0;j<size;j++) marks[idxs[j]] = mark; 876 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 877 maxsize = PetscMax(maxsize,size); 878 } 879 880 /* Find coarse edge endpoints */ 881 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 882 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 883 for (i=0;i<nee;i++) { 884 PetscInt mark = i+1,size; 885 886 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 887 if (!size && nedfieldlocal) continue; 888 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 889 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 890 if (print) { 891 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 892 ISView(eedges[i],NULL); 893 } 894 for (j=0;j<size;j++) { 895 PetscInt k, ee = idxs[j]; 896 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 897 for (k=ii[ee];k<ii[ee+1];k++) { 898 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 899 if (PetscBTLookup(btv,jj[k])) { 900 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 901 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 902 PetscInt k2; 903 PetscBool corner = PETSC_FALSE; 904 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 905 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 906 /* it's a corner if either is connected with an edge dof belonging to a different cc or 907 if the edge dof lie on the natural part of the boundary */ 908 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 909 corner = PETSC_TRUE; 910 break; 911 } 912 } 913 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 914 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 915 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 916 } else { 917 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 918 } 919 } 920 } 921 } 922 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 923 } 924 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 925 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 926 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 927 928 /* Reset marked primal dofs */ 929 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 930 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 931 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 932 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 933 934 /* Now use the initial lG */ 935 ierr = MatDestroy(&lG);CHKERRQ(ierr); 936 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 937 lG = lGinit; 938 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 939 940 /* Compute extended cols indices */ 941 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 942 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 943 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 944 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 945 i *= maxsize; 946 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 947 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 948 eerr = PETSC_FALSE; 949 for (i=0;i<nee;i++) { 950 PetscInt size,found = 0; 951 952 cum = 0; 953 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 954 if (!size && nedfieldlocal) continue; 955 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 956 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 957 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 958 for (j=0;j<size;j++) { 959 PetscInt k,ee = idxs[j]; 960 for (k=ii[ee];k<ii[ee+1];k++) { 961 PetscInt vv = jj[k]; 962 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 963 else if (!PetscBTLookupSet(btvc,vv)) found++; 964 } 965 } 966 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 967 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 968 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 969 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 970 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 971 /* it may happen that endpoints are not defined at this point 972 if it is the case, mark this edge for a second pass */ 973 if (cum != size -1 || found != 2) { 974 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 975 if (print) { 976 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 977 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 978 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 979 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 980 } 981 eerr = PETSC_TRUE; 982 } 983 } 984 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 985 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 986 if (done) { 987 PetscInt *newprimals; 988 989 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 990 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 991 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 992 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 993 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 994 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 995 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 996 for (i=0;i<nee;i++) { 997 PetscBool has_candidates = PETSC_FALSE; 998 if (PetscBTLookup(bter,i)) { 999 PetscInt size,mark = i+1; 1000 1001 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1002 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1003 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1004 for (j=0;j<size;j++) { 1005 PetscInt k,ee = idxs[j]; 1006 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1007 for (k=ii[ee];k<ii[ee+1];k++) { 1008 /* set all candidates located on the edge as corners */ 1009 if (PetscBTLookup(btvcand,jj[k])) { 1010 PetscInt k2,vv = jj[k]; 1011 has_candidates = PETSC_TRUE; 1012 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1013 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1014 /* set all edge dofs connected to candidate as primals */ 1015 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1016 if (marks[jjt[k2]] == mark) { 1017 PetscInt k3,ee2 = jjt[k2]; 1018 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1019 newprimals[cum++] = ee2; 1020 /* finally set the new corners */ 1021 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1022 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1023 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1024 } 1025 } 1026 } 1027 } else { 1028 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1029 } 1030 } 1031 } 1032 if (!has_candidates) { /* circular edge */ 1033 PetscInt k, ee = idxs[0],*tmarks; 1034 1035 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1036 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1037 for (k=ii[ee];k<ii[ee+1];k++) { 1038 PetscInt k2; 1039 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1040 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1041 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1042 } 1043 for (j=0;j<size;j++) { 1044 if (tmarks[idxs[j]] > 1) { 1045 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1046 newprimals[cum++] = idxs[j]; 1047 } 1048 } 1049 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1050 } 1051 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1052 } 1053 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1054 } 1055 ierr = PetscFree(extcols);CHKERRQ(ierr); 1056 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1057 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1058 if (fl2g) { 1059 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1060 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1061 for (i=0;i<nee;i++) { 1062 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1063 } 1064 ierr = PetscFree(eedges);CHKERRQ(ierr); 1065 } 1066 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1067 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1068 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1069 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1070 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1071 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1072 pcbddc->mat_graph->twodim = PETSC_FALSE; 1073 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1074 if (fl2g) { 1075 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1076 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1077 for (i=0;i<nee;i++) { 1078 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1079 } 1080 } else { 1081 eedges = alleedges; 1082 primals = allprimals; 1083 } 1084 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1085 1086 /* Mark again */ 1087 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1088 for (i=0;i<nee;i++) { 1089 PetscInt size,mark = i+1; 1090 1091 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1092 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1093 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1094 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1095 } 1096 if (print) { 1097 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1098 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1099 } 1100 1101 /* Recompute extended cols */ 1102 eerr = PETSC_FALSE; 1103 for (i=0;i<nee;i++) { 1104 PetscInt size; 1105 1106 cum = 0; 1107 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1108 if (!size && nedfieldlocal) continue; 1109 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1110 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1111 for (j=0;j<size;j++) { 1112 PetscInt k,ee = idxs[j]; 1113 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1114 } 1115 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1116 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1117 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1118 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1119 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1120 if (cum != size -1) { 1121 if (print) { 1122 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1123 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1125 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1126 } 1127 eerr = PETSC_TRUE; 1128 } 1129 } 1130 } 1131 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1132 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1133 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1134 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1135 /* an error should not occur at this point */ 1136 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1137 1138 /* Check the number of endpoints */ 1139 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1140 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1141 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1142 for (i=0;i<nee;i++) { 1143 PetscInt size, found = 0, gc[2]; 1144 1145 /* init with defaults */ 1146 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1147 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1148 if (!size && nedfieldlocal) continue; 1149 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1150 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1151 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1152 for (j=0;j<size;j++) { 1153 PetscInt k,ee = idxs[j]; 1154 for (k=ii[ee];k<ii[ee+1];k++) { 1155 PetscInt vv = jj[k]; 1156 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1157 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1158 corners[i*2+found++] = vv; 1159 } 1160 } 1161 } 1162 if (found != 2) { 1163 PetscInt e; 1164 if (fl2g) { 1165 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1166 } else { 1167 e = idxs[0]; 1168 } 1169 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1170 } 1171 1172 /* get primal dof index on this coarse edge */ 1173 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1174 if (gc[0] > gc[1]) { 1175 PetscInt swap = corners[2*i]; 1176 corners[2*i] = corners[2*i+1]; 1177 corners[2*i+1] = swap; 1178 } 1179 cedges[i] = idxs[size-1]; 1180 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1181 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1182 } 1183 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1184 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1185 1186 #if defined(PETSC_USE_DEBUG) 1187 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1188 not interfere with neighbouring coarse edges */ 1189 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1190 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1191 for (i=0;i<nv;i++) { 1192 PetscInt emax = 0,eemax = 0; 1193 1194 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1195 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1196 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1197 for (j=1;j<nee+1;j++) { 1198 if (emax < emarks[j]) { 1199 emax = emarks[j]; 1200 eemax = j; 1201 } 1202 } 1203 /* not relevant for edges */ 1204 if (!eemax) continue; 1205 1206 for (j=ii[i];j<ii[i+1];j++) { 1207 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1208 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1209 } 1210 } 1211 } 1212 ierr = PetscFree(emarks);CHKERRQ(ierr); 1213 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1214 #endif 1215 1216 /* Compute extended rows indices for edge blocks of the change of basis */ 1217 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1218 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1219 extmem *= maxsize; 1220 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1221 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1222 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1223 for (i=0;i<nv;i++) { 1224 PetscInt mark = 0,size,start; 1225 1226 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1227 for (j=ii[i];j<ii[i+1];j++) 1228 if (marks[jj[j]] && !mark) 1229 mark = marks[jj[j]]; 1230 1231 /* not relevant */ 1232 if (!mark) continue; 1233 1234 /* import extended row */ 1235 mark--; 1236 start = mark*extmem+extrowcum[mark]; 1237 size = ii[i+1]-ii[i]; 1238 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1239 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1240 extrowcum[mark] += size; 1241 } 1242 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1243 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1244 ierr = PetscFree(marks);CHKERRQ(ierr); 1245 1246 /* Compress extrows */ 1247 cum = 0; 1248 for (i=0;i<nee;i++) { 1249 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1250 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1251 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1252 cum = PetscMax(cum,size); 1253 } 1254 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1255 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1256 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1257 1258 /* Workspace for lapack inner calls and VecSetValues */ 1259 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1260 1261 /* Create change of basis matrix (preallocation can be improved) */ 1262 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1263 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1264 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1265 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1266 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1267 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1268 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1269 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1270 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1271 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1272 1273 /* Defaults to identity */ 1274 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1275 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1276 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1277 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1278 1279 /* Create discrete gradient for the coarser level if needed */ 1280 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1281 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1282 if (pcbddc->current_level < pcbddc->max_levels) { 1283 ISLocalToGlobalMapping cel2g,cvl2g; 1284 IS wis,gwis; 1285 PetscInt cnv,cne; 1286 1287 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1288 if (fl2g) { 1289 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1290 } else { 1291 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1292 pcbddc->nedclocal = wis; 1293 } 1294 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1295 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1296 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1297 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1298 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1299 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1300 1301 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1302 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1303 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1304 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1305 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1306 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1307 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1308 1309 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1310 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1311 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1312 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1313 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1314 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1315 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1316 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1317 } 1318 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1319 1320 #if defined(PRINT_GDET) 1321 inc = 0; 1322 lev = pcbddc->current_level; 1323 #endif 1324 1325 /* Insert values in the change of basis matrix */ 1326 for (i=0;i<nee;i++) { 1327 Mat Gins = NULL, GKins = NULL; 1328 IS cornersis = NULL; 1329 PetscScalar cvals[2]; 1330 1331 if (pcbddc->nedcG) { 1332 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1333 } 1334 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1335 if (Gins && GKins) { 1336 PetscScalar *data; 1337 const PetscInt *rows,*cols; 1338 PetscInt nrh,nch,nrc,ncc; 1339 1340 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1341 /* H1 */ 1342 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1343 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1344 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1345 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1346 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1347 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1348 /* complement */ 1349 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1350 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1351 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1352 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1353 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1354 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1355 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1356 1357 /* coarse discrete gradient */ 1358 if (pcbddc->nedcG) { 1359 PetscInt cols[2]; 1360 1361 cols[0] = 2*i; 1362 cols[1] = 2*i+1; 1363 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1364 } 1365 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1366 } 1367 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1368 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1369 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1370 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1371 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1372 } 1373 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1374 1375 /* Start assembling */ 1376 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1377 if (pcbddc->nedcG) { 1378 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1379 } 1380 1381 /* Free */ 1382 if (fl2g) { 1383 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1384 for (i=0;i<nee;i++) { 1385 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(eedges);CHKERRQ(ierr); 1388 } 1389 1390 /* hack mat_graph with primal dofs on the coarse edges */ 1391 { 1392 PCBDDCGraph graph = pcbddc->mat_graph; 1393 PetscInt *oqueue = graph->queue; 1394 PetscInt *ocptr = graph->cptr; 1395 PetscInt ncc,*idxs; 1396 1397 /* find first primal edge */ 1398 if (pcbddc->nedclocal) { 1399 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1400 } else { 1401 if (fl2g) { 1402 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1403 } 1404 idxs = cedges; 1405 } 1406 cum = 0; 1407 while (cum < nee && cedges[cum] < 0) cum++; 1408 1409 /* adapt connected components */ 1410 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1411 graph->cptr[0] = 0; 1412 for (i=0,ncc=0;i<graph->ncc;i++) { 1413 PetscInt lc = ocptr[i+1]-ocptr[i]; 1414 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1415 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1416 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1417 ncc++; 1418 lc--; 1419 cum++; 1420 while (cum < nee && cedges[cum] < 0) cum++; 1421 } 1422 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1423 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1424 ncc++; 1425 } 1426 graph->ncc = ncc; 1427 if (pcbddc->nedclocal) { 1428 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1429 } 1430 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1431 } 1432 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1433 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1434 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1435 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1436 1437 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1438 ierr = PetscFree(extrow);CHKERRQ(ierr); 1439 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1440 ierr = PetscFree(corners);CHKERRQ(ierr); 1441 ierr = PetscFree(cedges);CHKERRQ(ierr); 1442 ierr = PetscFree(extrows);CHKERRQ(ierr); 1443 ierr = PetscFree(extcols);CHKERRQ(ierr); 1444 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1445 1446 /* Complete assembling */ 1447 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1448 if (pcbddc->nedcG) { 1449 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1450 #if 0 1451 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1452 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1453 #endif 1454 } 1455 1456 /* set change of basis */ 1457 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1458 ierr = MatDestroy(&T);CHKERRQ(ierr); 1459 1460 PetscFunctionReturn(0); 1461 } 1462 1463 /* the near-null space of BDDC carries information on quadrature weights, 1464 and these can be collinear -> so cheat with MatNullSpaceCreate 1465 and create a suitable set of basis vectors first */ 1466 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1467 { 1468 PetscErrorCode ierr; 1469 PetscInt i; 1470 1471 PetscFunctionBegin; 1472 for (i=0;i<nvecs;i++) { 1473 PetscInt first,last; 1474 1475 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1476 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1477 if (i>=first && i < last) { 1478 PetscScalar *data; 1479 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1480 if (!has_const) { 1481 data[i-first] = 1.; 1482 } else { 1483 data[2*i-first] = 1./PetscSqrtReal(2.); 1484 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1485 } 1486 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1487 } 1488 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1489 } 1490 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1491 for (i=0;i<nvecs;i++) { /* reset vectors */ 1492 PetscInt first,last; 1493 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1494 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1495 if (i>=first && i < last) { 1496 PetscScalar *data; 1497 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1498 if (!has_const) { 1499 data[i-first] = 0.; 1500 } else { 1501 data[2*i-first] = 0.; 1502 data[2*i-first+1] = 0.; 1503 } 1504 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1505 } 1506 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1507 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1508 } 1509 PetscFunctionReturn(0); 1510 } 1511 1512 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1513 { 1514 Mat loc_divudotp; 1515 Vec p,v,vins,quad_vec,*quad_vecs; 1516 ISLocalToGlobalMapping map; 1517 IS *faces,*edges; 1518 PetscScalar *vals; 1519 const PetscScalar *array; 1520 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1521 PetscMPIInt rank; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1526 if (graph->twodim) { 1527 lmaxneighs = 2; 1528 } else { 1529 lmaxneighs = 1; 1530 for (i=0;i<ne;i++) { 1531 const PetscInt *idxs; 1532 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1533 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1534 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1535 } 1536 lmaxneighs++; /* graph count does not include self */ 1537 } 1538 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1539 maxsize = 0; 1540 for (i=0;i<ne;i++) { 1541 PetscInt nn; 1542 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1543 maxsize = PetscMax(maxsize,nn); 1544 } 1545 for (i=0;i<nf;i++) { 1546 PetscInt nn; 1547 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1548 maxsize = PetscMax(maxsize,nn); 1549 } 1550 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1551 /* create vectors to hold quadrature weights */ 1552 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1553 if (!transpose) { 1554 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1555 } else { 1556 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1557 } 1558 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1559 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1560 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1561 for (i=0;i<maxneighs;i++) { 1562 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1563 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1564 } 1565 1566 /* compute local quad vec */ 1567 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1568 if (!transpose) { 1569 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1570 } else { 1571 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1572 } 1573 ierr = VecSet(p,1.);CHKERRQ(ierr); 1574 if (!transpose) { 1575 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1576 } else { 1577 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1578 } 1579 if (vl2l) { 1580 Mat lA; 1581 VecScatter sc; 1582 1583 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1584 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1585 ierr = VecScatterCreate(v,vl2l,vins,NULL,&sc);CHKERRQ(ierr); 1586 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1587 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1588 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1589 } else { 1590 vins = v; 1591 } 1592 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1593 ierr = VecDestroy(&p);CHKERRQ(ierr); 1594 1595 /* insert in global quadrature vecs */ 1596 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1597 for (i=0;i<nf;i++) { 1598 const PetscInt *idxs; 1599 PetscInt idx,nn,j; 1600 1601 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1602 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1603 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1604 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1605 idx = -(idx+1); 1606 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1607 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1608 } 1609 for (i=0;i<ne;i++) { 1610 const PetscInt *idxs; 1611 PetscInt idx,nn,j; 1612 1613 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1614 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1615 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1616 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1617 idx = -(idx+1); 1618 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1619 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1620 } 1621 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1622 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1623 if (vl2l) { 1624 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1625 } 1626 ierr = VecDestroy(&v);CHKERRQ(ierr); 1627 ierr = PetscFree(vals);CHKERRQ(ierr); 1628 1629 /* assemble near null space */ 1630 for (i=0;i<maxneighs;i++) { 1631 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1632 } 1633 for (i=0;i<maxneighs;i++) { 1634 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1635 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1636 } 1637 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1638 PetscFunctionReturn(0); 1639 } 1640 1641 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1642 { 1643 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1644 PetscErrorCode ierr; 1645 1646 PetscFunctionBegin; 1647 if (primalv) { 1648 if (pcbddc->user_primal_vertices_local) { 1649 IS list[2], newp; 1650 1651 list[0] = primalv; 1652 list[1] = pcbddc->user_primal_vertices_local; 1653 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1654 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1655 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1656 pcbddc->user_primal_vertices_local = newp; 1657 } else { 1658 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1659 } 1660 } 1661 PetscFunctionReturn(0); 1662 } 1663 1664 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1665 { 1666 PetscErrorCode ierr; 1667 Vec local,global; 1668 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1669 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1670 PetscBool monolithic = PETSC_FALSE; 1671 1672 PetscFunctionBegin; 1673 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1674 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1675 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1676 /* need to convert from global to local topology information and remove references to information in global ordering */ 1677 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1678 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1679 if (monolithic) goto boundary; 1680 1681 if (pcbddc->user_provided_isfordofs) { 1682 if (pcbddc->n_ISForDofs) { 1683 PetscInt i; 1684 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1685 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1686 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1687 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1688 } 1689 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1690 pcbddc->n_ISForDofs = 0; 1691 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1692 } 1693 } else { 1694 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1695 DM dm; 1696 1697 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1698 if (!dm) { 1699 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1700 } 1701 if (dm) { 1702 IS *fields; 1703 PetscInt nf,i; 1704 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1705 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1706 for (i=0;i<nf;i++) { 1707 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1708 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1709 } 1710 ierr = PetscFree(fields);CHKERRQ(ierr); 1711 pcbddc->n_ISForDofsLocal = nf; 1712 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1713 PetscContainer c; 1714 1715 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1716 if (c) { 1717 MatISLocalFields lf; 1718 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1719 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1720 } else { /* fallback, create the default fields if bs > 1 */ 1721 PetscInt i, n = matis->A->rmap->n; 1722 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1723 if (i > 1) { 1724 pcbddc->n_ISForDofsLocal = i; 1725 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1726 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1727 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1728 } 1729 } 1730 } 1731 } 1732 } else { 1733 PetscInt i; 1734 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1735 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1736 } 1737 } 1738 } 1739 1740 boundary: 1741 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1742 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1743 } else if (pcbddc->DirichletBoundariesLocal) { 1744 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1745 } 1746 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1747 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1748 } else if (pcbddc->NeumannBoundariesLocal) { 1749 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1750 } 1751 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1752 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1753 } 1754 ierr = VecDestroy(&global);CHKERRQ(ierr); 1755 ierr = VecDestroy(&local);CHKERRQ(ierr); 1756 /* detect local disconnected subdomains if requested (use matis->A) */ 1757 if (pcbddc->detect_disconnected) { 1758 IS primalv = NULL; 1759 PetscInt i; 1760 1761 for (i=0;i<pcbddc->n_local_subs;i++) { 1762 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1763 } 1764 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1765 ierr = PCBDDCDetectDisconnectedComponents(pc,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1766 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1767 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1768 } 1769 /* early stage corner detection */ 1770 { 1771 DM dm; 1772 1773 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1774 if (dm) { 1775 PetscBool isda; 1776 1777 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1778 if (isda) { 1779 ISLocalToGlobalMapping l2l; 1780 IS corners; 1781 Mat lA; 1782 1783 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1784 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1785 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1786 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1787 if (l2l) { 1788 const PetscInt *idx; 1789 PetscInt bs,*idxout,n; 1790 1791 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1792 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1793 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1794 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1795 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1796 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1797 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1798 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1799 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1800 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1801 } else { /* not from DMDA */ 1802 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1803 } 1804 } 1805 } 1806 } 1807 PetscFunctionReturn(0); 1808 } 1809 1810 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1811 { 1812 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1813 PetscErrorCode ierr; 1814 IS nis; 1815 const PetscInt *idxs; 1816 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1817 PetscBool *ld; 1818 1819 PetscFunctionBegin; 1820 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1821 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1822 if (mop == MPI_LAND) { 1823 /* init rootdata with true */ 1824 ld = (PetscBool*) matis->sf_rootdata; 1825 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1826 } else { 1827 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1828 } 1829 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1830 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1831 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1832 ld = (PetscBool*) matis->sf_leafdata; 1833 for (i=0;i<nd;i++) 1834 if (-1 < idxs[i] && idxs[i] < n) 1835 ld[idxs[i]] = PETSC_TRUE; 1836 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1837 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1838 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1839 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1840 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1841 if (mop == MPI_LAND) { 1842 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1843 } else { 1844 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1845 } 1846 for (i=0,nnd=0;i<n;i++) 1847 if (ld[i]) 1848 nidxs[nnd++] = i; 1849 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1850 ierr = ISDestroy(is);CHKERRQ(ierr); 1851 *is = nis; 1852 PetscFunctionReturn(0); 1853 } 1854 1855 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1856 { 1857 PC_IS *pcis = (PC_IS*)(pc->data); 1858 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1859 PetscErrorCode ierr; 1860 1861 PetscFunctionBegin; 1862 if (!pcbddc->benign_have_null) { 1863 PetscFunctionReturn(0); 1864 } 1865 if (pcbddc->ChangeOfBasisMatrix) { 1866 Vec swap; 1867 1868 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1869 swap = pcbddc->work_change; 1870 pcbddc->work_change = r; 1871 r = swap; 1872 } 1873 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1874 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1875 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1876 ierr = VecSet(z,0.);CHKERRQ(ierr); 1877 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1878 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1879 if (pcbddc->ChangeOfBasisMatrix) { 1880 pcbddc->work_change = r; 1881 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1882 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1883 } 1884 PetscFunctionReturn(0); 1885 } 1886 1887 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1888 { 1889 PCBDDCBenignMatMult_ctx ctx; 1890 PetscErrorCode ierr; 1891 PetscBool apply_right,apply_left,reset_x; 1892 1893 PetscFunctionBegin; 1894 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1895 if (transpose) { 1896 apply_right = ctx->apply_left; 1897 apply_left = ctx->apply_right; 1898 } else { 1899 apply_right = ctx->apply_right; 1900 apply_left = ctx->apply_left; 1901 } 1902 reset_x = PETSC_FALSE; 1903 if (apply_right) { 1904 const PetscScalar *ax; 1905 PetscInt nl,i; 1906 1907 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1908 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1909 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1910 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1911 for (i=0;i<ctx->benign_n;i++) { 1912 PetscScalar sum,val; 1913 const PetscInt *idxs; 1914 PetscInt nz,j; 1915 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1916 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1917 sum = 0.; 1918 if (ctx->apply_p0) { 1919 val = ctx->work[idxs[nz-1]]; 1920 for (j=0;j<nz-1;j++) { 1921 sum += ctx->work[idxs[j]]; 1922 ctx->work[idxs[j]] += val; 1923 } 1924 } else { 1925 for (j=0;j<nz-1;j++) { 1926 sum += ctx->work[idxs[j]]; 1927 } 1928 } 1929 ctx->work[idxs[nz-1]] -= sum; 1930 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1931 } 1932 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1933 reset_x = PETSC_TRUE; 1934 } 1935 if (transpose) { 1936 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1937 } else { 1938 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1939 } 1940 if (reset_x) { 1941 ierr = VecResetArray(x);CHKERRQ(ierr); 1942 } 1943 if (apply_left) { 1944 PetscScalar *ay; 1945 PetscInt i; 1946 1947 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1948 for (i=0;i<ctx->benign_n;i++) { 1949 PetscScalar sum,val; 1950 const PetscInt *idxs; 1951 PetscInt nz,j; 1952 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1953 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1954 val = -ay[idxs[nz-1]]; 1955 if (ctx->apply_p0) { 1956 sum = 0.; 1957 for (j=0;j<nz-1;j++) { 1958 sum += ay[idxs[j]]; 1959 ay[idxs[j]] += val; 1960 } 1961 ay[idxs[nz-1]] += sum; 1962 } else { 1963 for (j=0;j<nz-1;j++) { 1964 ay[idxs[j]] += val; 1965 } 1966 ay[idxs[nz-1]] = 0.; 1967 } 1968 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1969 } 1970 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1971 } 1972 PetscFunctionReturn(0); 1973 } 1974 1975 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1976 { 1977 PetscErrorCode ierr; 1978 1979 PetscFunctionBegin; 1980 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1981 PetscFunctionReturn(0); 1982 } 1983 1984 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1985 { 1986 PetscErrorCode ierr; 1987 1988 PetscFunctionBegin; 1989 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1990 PetscFunctionReturn(0); 1991 } 1992 1993 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1994 { 1995 PC_IS *pcis = (PC_IS*)pc->data; 1996 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1997 PCBDDCBenignMatMult_ctx ctx; 1998 PetscErrorCode ierr; 1999 2000 PetscFunctionBegin; 2001 if (!restore) { 2002 Mat A_IB,A_BI; 2003 PetscScalar *work; 2004 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2005 2006 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2007 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2008 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 2009 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 2010 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2011 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 2012 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 2013 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 2014 ierr = PetscNew(&ctx);CHKERRQ(ierr); 2015 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 2016 ctx->apply_left = PETSC_TRUE; 2017 ctx->apply_right = PETSC_FALSE; 2018 ctx->apply_p0 = PETSC_FALSE; 2019 ctx->benign_n = pcbddc->benign_n; 2020 if (reuse) { 2021 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2022 ctx->free = PETSC_FALSE; 2023 } else { /* TODO: could be optimized for successive solves */ 2024 ISLocalToGlobalMapping N_to_D; 2025 PetscInt i; 2026 2027 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2028 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2029 for (i=0;i<pcbddc->benign_n;i++) { 2030 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2031 } 2032 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2033 ctx->free = PETSC_TRUE; 2034 } 2035 ctx->A = pcis->A_IB; 2036 ctx->work = work; 2037 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2038 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2039 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2040 pcis->A_IB = A_IB; 2041 2042 /* A_BI as A_IB^T */ 2043 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2044 pcbddc->benign_original_mat = pcis->A_BI; 2045 pcis->A_BI = A_BI; 2046 } else { 2047 if (!pcbddc->benign_original_mat) { 2048 PetscFunctionReturn(0); 2049 } 2050 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2051 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2052 pcis->A_IB = ctx->A; 2053 ctx->A = NULL; 2054 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2055 pcis->A_BI = pcbddc->benign_original_mat; 2056 pcbddc->benign_original_mat = NULL; 2057 if (ctx->free) { 2058 PetscInt i; 2059 for (i=0;i<ctx->benign_n;i++) { 2060 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2061 } 2062 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2063 } 2064 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2065 ierr = PetscFree(ctx);CHKERRQ(ierr); 2066 } 2067 PetscFunctionReturn(0); 2068 } 2069 2070 /* used just in bddc debug mode */ 2071 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2072 { 2073 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2074 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2075 Mat An; 2076 PetscErrorCode ierr; 2077 2078 PetscFunctionBegin; 2079 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2080 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2081 if (is1) { 2082 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2083 ierr = MatDestroy(&An);CHKERRQ(ierr); 2084 } else { 2085 *B = An; 2086 } 2087 PetscFunctionReturn(0); 2088 } 2089 2090 /* TODO: add reuse flag */ 2091 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2092 { 2093 Mat Bt; 2094 PetscScalar *a,*bdata; 2095 const PetscInt *ii,*ij; 2096 PetscInt m,n,i,nnz,*bii,*bij; 2097 PetscBool flg_row; 2098 PetscErrorCode ierr; 2099 2100 PetscFunctionBegin; 2101 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2102 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2103 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2104 nnz = n; 2105 for (i=0;i<ii[n];i++) { 2106 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2107 } 2108 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2109 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2110 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2111 nnz = 0; 2112 bii[0] = 0; 2113 for (i=0;i<n;i++) { 2114 PetscInt j; 2115 for (j=ii[i];j<ii[i+1];j++) { 2116 PetscScalar entry = a[j]; 2117 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 2118 bij[nnz] = ij[j]; 2119 bdata[nnz] = entry; 2120 nnz++; 2121 } 2122 } 2123 bii[i+1] = nnz; 2124 } 2125 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2126 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2127 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2128 { 2129 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2130 b->free_a = PETSC_TRUE; 2131 b->free_ij = PETSC_TRUE; 2132 } 2133 *B = Bt; 2134 PetscFunctionReturn(0); 2135 } 2136 2137 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscInt *ncc, IS* cc[], IS* primalv) 2138 { 2139 Mat B = NULL; 2140 DM dm; 2141 IS is_dummy,*cc_n; 2142 ISLocalToGlobalMapping l2gmap_dummy; 2143 PCBDDCGraph graph; 2144 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2145 PetscInt i,n; 2146 PetscInt *xadj,*adjncy; 2147 PetscBool isplex = PETSC_FALSE; 2148 PetscErrorCode ierr; 2149 2150 PetscFunctionBegin; 2151 if (ncc) *ncc = 0; 2152 if (cc) *cc = NULL; 2153 if (primalv) *primalv = NULL; 2154 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2155 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2156 if (!dm) { 2157 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2158 } 2159 if (dm) { 2160 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2161 } 2162 if (isplex) { /* this code has been modified from plexpartition.c */ 2163 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2164 PetscInt *adj = NULL; 2165 IS cellNumbering; 2166 const PetscInt *cellNum; 2167 PetscBool useCone, useClosure; 2168 PetscSection section; 2169 PetscSegBuffer adjBuffer; 2170 PetscSF sfPoint; 2171 PetscErrorCode ierr; 2172 2173 PetscFunctionBegin; 2174 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2175 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2176 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2177 /* Build adjacency graph via a section/segbuffer */ 2178 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2179 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2180 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2181 /* Always use FVM adjacency to create partitioner graph */ 2182 ierr = DMPlexGetAdjacencyUseCone(dm, &useCone);CHKERRQ(ierr); 2183 ierr = DMPlexGetAdjacencyUseClosure(dm, &useClosure);CHKERRQ(ierr); 2184 ierr = DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);CHKERRQ(ierr); 2185 ierr = DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);CHKERRQ(ierr); 2186 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2187 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2188 for (n = 0, p = pStart; p < pEnd; p++) { 2189 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2190 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2191 adjSize = PETSC_DETERMINE; 2192 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2193 for (a = 0; a < adjSize; ++a) { 2194 const PetscInt point = adj[a]; 2195 if (pStart <= point && point < pEnd) { 2196 PetscInt *PETSC_RESTRICT pBuf; 2197 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2198 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2199 *pBuf = point; 2200 } 2201 } 2202 n++; 2203 } 2204 ierr = DMPlexSetAdjacencyUseCone(dm, useCone);CHKERRQ(ierr); 2205 ierr = DMPlexSetAdjacencyUseClosure(dm, useClosure);CHKERRQ(ierr); 2206 /* Derive CSR graph from section/segbuffer */ 2207 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2208 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2209 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2210 for (idx = 0, p = pStart; p < pEnd; p++) { 2211 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2212 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2213 } 2214 xadj[n] = size; 2215 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2216 /* Clean up */ 2217 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2218 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2219 ierr = PetscFree(adj);CHKERRQ(ierr); 2220 graph->xadj = xadj; 2221 graph->adjncy = adjncy; 2222 } else { 2223 Mat A; 2224 PetscBool filter = PETSC_FALSE, isseqaij, flg_row; 2225 2226 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2227 if (!A->rmap->N || !A->cmap->N) { 2228 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2229 PetscFunctionReturn(0); 2230 } 2231 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2232 if (!isseqaij && filter) { 2233 PetscBool isseqdense; 2234 2235 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2236 if (!isseqdense) { 2237 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2238 } else { /* TODO: rectangular case and LDA */ 2239 PetscScalar *array; 2240 PetscReal chop=1.e-6; 2241 2242 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2243 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2244 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2245 for (i=0;i<n;i++) { 2246 PetscInt j; 2247 for (j=i+1;j<n;j++) { 2248 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2249 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2250 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2251 } 2252 } 2253 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2254 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2255 } 2256 } else { 2257 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2258 B = A; 2259 } 2260 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2261 2262 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2263 if (filter) { 2264 PetscScalar *data; 2265 PetscInt j,cum; 2266 2267 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2268 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2269 cum = 0; 2270 for (i=0;i<n;i++) { 2271 PetscInt t; 2272 2273 for (j=xadj[i];j<xadj[i+1];j++) { 2274 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2275 continue; 2276 } 2277 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2278 } 2279 t = xadj_filtered[i]; 2280 xadj_filtered[i] = cum; 2281 cum += t; 2282 } 2283 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2284 graph->xadj = xadj_filtered; 2285 graph->adjncy = adjncy_filtered; 2286 } else { 2287 graph->xadj = xadj; 2288 graph->adjncy = adjncy; 2289 } 2290 } 2291 /* compute local connected components using PCBDDCGraph */ 2292 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2293 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2294 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2295 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2296 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2297 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2298 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2299 2300 /* partial clean up */ 2301 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2302 if (B) { 2303 PetscBool flg_row; 2304 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2305 ierr = MatDestroy(&B);CHKERRQ(ierr); 2306 } 2307 if (isplex) { 2308 ierr = PetscFree(xadj);CHKERRQ(ierr); 2309 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2310 } 2311 2312 /* get back data */ 2313 if (isplex) { 2314 if (ncc) *ncc = graph->ncc; 2315 if (cc || primalv) { 2316 Mat A; 2317 PetscBT btv,btvt; 2318 PetscSection subSection; 2319 PetscInt *ids,cum,cump,*cids,*pids; 2320 2321 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2322 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2323 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2324 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2325 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2326 2327 cids[0] = 0; 2328 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2329 PetscInt j; 2330 2331 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2332 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2333 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2334 2335 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2336 for (k = 0; k < 2*size; k += 2) { 2337 PetscInt s, p = closure[k], off, dof, cdof; 2338 2339 ierr = PetscSectionGetConstraintDof(subSection, p, &cdof);CHKERRQ(ierr); 2340 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2341 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2342 for (s = 0; s < dof-cdof; s++) { 2343 if (PetscBTLookupSet(btvt,off+s)) continue; 2344 if (!PetscBTLookup(btv,off+s)) { 2345 ids[cum++] = off+s; 2346 } else { /* cross-vertex */ 2347 pids[cump++] = off+s; 2348 } 2349 } 2350 } 2351 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2352 } 2353 cids[i+1] = cum; 2354 /* mark dofs as already assigned */ 2355 for (j = cids[i]; j < cids[i+1]; j++) { 2356 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2357 } 2358 } 2359 if (cc) { 2360 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2361 for (i = 0; i < graph->ncc; i++) { 2362 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2363 } 2364 *cc = cc_n; 2365 } 2366 if (primalv) { 2367 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2368 } 2369 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2370 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2371 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2372 } 2373 } else { 2374 if (ncc) *ncc = graph->ncc; 2375 if (cc) { 2376 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2377 for (i=0;i<graph->ncc;i++) { 2378 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2379 } 2380 *cc = cc_n; 2381 } 2382 } 2383 /* clean up graph */ 2384 graph->xadj = 0; 2385 graph->adjncy = 0; 2386 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2387 PetscFunctionReturn(0); 2388 } 2389 2390 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2391 { 2392 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2393 PC_IS* pcis = (PC_IS*)(pc->data); 2394 IS dirIS = NULL; 2395 PetscInt i; 2396 PetscErrorCode ierr; 2397 2398 PetscFunctionBegin; 2399 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2400 if (zerodiag) { 2401 Mat A; 2402 Vec vec3_N; 2403 PetscScalar *vals; 2404 const PetscInt *idxs; 2405 PetscInt nz,*count; 2406 2407 /* p0 */ 2408 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2409 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2410 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2411 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2412 for (i=0;i<nz;i++) vals[i] = 1.; 2413 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2414 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2415 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2416 /* v_I */ 2417 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2418 for (i=0;i<nz;i++) vals[i] = 0.; 2419 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2420 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2421 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2422 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2423 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2424 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2425 if (dirIS) { 2426 PetscInt n; 2427 2428 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2429 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2430 for (i=0;i<n;i++) vals[i] = 0.; 2431 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2432 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2433 } 2434 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2435 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2436 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2437 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2438 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2439 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2440 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2441 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2442 ierr = PetscFree(vals);CHKERRQ(ierr); 2443 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2444 2445 /* there should not be any pressure dofs lying on the interface */ 2446 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2447 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2448 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2449 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2450 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2451 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2452 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2453 ierr = PetscFree(count);CHKERRQ(ierr); 2454 } 2455 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2456 2457 /* check PCBDDCBenignGetOrSetP0 */ 2458 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2459 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2460 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2461 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2462 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2463 for (i=0;i<pcbddc->benign_n;i++) { 2464 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2465 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2466 } 2467 PetscFunctionReturn(0); 2468 } 2469 2470 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2471 { 2472 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2473 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2474 PetscInt nz,n; 2475 PetscInt *interior_dofs,n_interior_dofs,nneu; 2476 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2477 PetscErrorCode ierr; 2478 2479 PetscFunctionBegin; 2480 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2481 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2482 for (n=0;n<pcbddc->benign_n;n++) { 2483 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2484 } 2485 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2486 pcbddc->benign_n = 0; 2487 2488 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2489 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2490 Checks if all the pressure dofs in each subdomain have a zero diagonal 2491 If not, a change of basis on pressures is not needed 2492 since the local Schur complements are already SPD 2493 */ 2494 has_null_pressures = PETSC_TRUE; 2495 have_null = PETSC_TRUE; 2496 if (pcbddc->n_ISForDofsLocal) { 2497 IS iP = NULL; 2498 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2499 2500 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2501 ierr = PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2502 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2503 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2504 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2505 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2506 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2507 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2508 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2509 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2510 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2511 if (iP) { 2512 IS newpressures; 2513 2514 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2515 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2516 pressures = newpressures; 2517 } 2518 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2519 if (!sorted) { 2520 ierr = ISSort(pressures);CHKERRQ(ierr); 2521 } 2522 } else { 2523 pressures = NULL; 2524 } 2525 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2526 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2527 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2528 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2529 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2530 if (!sorted) { 2531 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2532 } 2533 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2534 zerodiag_save = zerodiag; 2535 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2536 if (!nz) { 2537 if (n) have_null = PETSC_FALSE; 2538 has_null_pressures = PETSC_FALSE; 2539 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2540 } 2541 recompute_zerodiag = PETSC_FALSE; 2542 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2543 zerodiag_subs = NULL; 2544 pcbddc->benign_n = 0; 2545 n_interior_dofs = 0; 2546 interior_dofs = NULL; 2547 nneu = 0; 2548 if (pcbddc->NeumannBoundariesLocal) { 2549 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2550 } 2551 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2552 if (checkb) { /* need to compute interior nodes */ 2553 PetscInt n,i,j; 2554 PetscInt n_neigh,*neigh,*n_shared,**shared; 2555 PetscInt *iwork; 2556 2557 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2558 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2559 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2560 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2561 for (i=1;i<n_neigh;i++) 2562 for (j=0;j<n_shared[i];j++) 2563 iwork[shared[i][j]] += 1; 2564 for (i=0;i<n;i++) 2565 if (!iwork[i]) 2566 interior_dofs[n_interior_dofs++] = i; 2567 ierr = PetscFree(iwork);CHKERRQ(ierr); 2568 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2569 } 2570 if (has_null_pressures) { 2571 IS *subs; 2572 PetscInt nsubs,i,j,nl; 2573 const PetscInt *idxs; 2574 PetscScalar *array; 2575 Vec *work; 2576 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2577 2578 subs = pcbddc->local_subs; 2579 nsubs = pcbddc->n_local_subs; 2580 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2581 if (checkb) { 2582 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2583 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2584 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2585 /* work[0] = 1_p */ 2586 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2587 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2588 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2589 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2590 /* work[0] = 1_v */ 2591 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2592 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2593 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2594 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2595 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2596 } 2597 if (nsubs > 1) { 2598 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2599 for (i=0;i<nsubs;i++) { 2600 ISLocalToGlobalMapping l2g; 2601 IS t_zerodiag_subs; 2602 PetscInt nl; 2603 2604 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2605 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2606 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2607 if (nl) { 2608 PetscBool valid = PETSC_TRUE; 2609 2610 if (checkb) { 2611 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2612 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2613 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2614 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2615 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2616 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2617 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2618 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2619 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2620 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2621 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2622 for (j=0;j<n_interior_dofs;j++) { 2623 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2624 valid = PETSC_FALSE; 2625 break; 2626 } 2627 } 2628 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2629 } 2630 if (valid && nneu) { 2631 const PetscInt *idxs; 2632 PetscInt nzb; 2633 2634 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2635 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2636 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2637 if (nzb) valid = PETSC_FALSE; 2638 } 2639 if (valid && pressures) { 2640 IS t_pressure_subs; 2641 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2642 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2643 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2644 } 2645 if (valid) { 2646 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2647 pcbddc->benign_n++; 2648 } else { 2649 recompute_zerodiag = PETSC_TRUE; 2650 } 2651 } 2652 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2653 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2654 } 2655 } else { /* there's just one subdomain (or zero if they have not been detected */ 2656 PetscBool valid = PETSC_TRUE; 2657 2658 if (nneu) valid = PETSC_FALSE; 2659 if (valid && pressures) { 2660 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2661 } 2662 if (valid && checkb) { 2663 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2664 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2665 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2666 for (j=0;j<n_interior_dofs;j++) { 2667 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2668 valid = PETSC_FALSE; 2669 break; 2670 } 2671 } 2672 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2673 } 2674 if (valid) { 2675 pcbddc->benign_n = 1; 2676 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2677 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2678 zerodiag_subs[0] = zerodiag; 2679 } 2680 } 2681 if (checkb) { 2682 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2683 } 2684 } 2685 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2686 2687 if (!pcbddc->benign_n) { 2688 PetscInt n; 2689 2690 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2691 recompute_zerodiag = PETSC_FALSE; 2692 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2693 if (n) { 2694 has_null_pressures = PETSC_FALSE; 2695 have_null = PETSC_FALSE; 2696 } 2697 } 2698 2699 /* final check for null pressures */ 2700 if (zerodiag && pressures) { 2701 PetscInt nz,np; 2702 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2703 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2704 if (nz != np) have_null = PETSC_FALSE; 2705 } 2706 2707 if (recompute_zerodiag) { 2708 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2709 if (pcbddc->benign_n == 1) { 2710 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2711 zerodiag = zerodiag_subs[0]; 2712 } else { 2713 PetscInt i,nzn,*new_idxs; 2714 2715 nzn = 0; 2716 for (i=0;i<pcbddc->benign_n;i++) { 2717 PetscInt ns; 2718 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2719 nzn += ns; 2720 } 2721 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2722 nzn = 0; 2723 for (i=0;i<pcbddc->benign_n;i++) { 2724 PetscInt ns,*idxs; 2725 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2726 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2727 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2728 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2729 nzn += ns; 2730 } 2731 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2732 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2733 } 2734 have_null = PETSC_FALSE; 2735 } 2736 2737 /* Prepare matrix to compute no-net-flux */ 2738 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2739 Mat A,loc_divudotp; 2740 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2741 IS row,col,isused = NULL; 2742 PetscInt M,N,n,st,n_isused; 2743 2744 if (pressures) { 2745 isused = pressures; 2746 } else { 2747 isused = zerodiag_save; 2748 } 2749 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2750 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2751 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2752 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2753 n_isused = 0; 2754 if (isused) { 2755 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2756 } 2757 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2758 st = st-n_isused; 2759 if (n) { 2760 const PetscInt *gidxs; 2761 2762 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2763 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2764 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2765 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2766 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2767 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2768 } else { 2769 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2770 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2771 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2772 } 2773 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2774 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2775 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2776 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2777 ierr = ISDestroy(&row);CHKERRQ(ierr); 2778 ierr = ISDestroy(&col);CHKERRQ(ierr); 2779 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2780 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2781 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2782 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2783 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2784 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2785 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2786 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2787 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2788 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2789 } 2790 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2791 2792 /* change of basis and p0 dofs */ 2793 if (has_null_pressures) { 2794 IS zerodiagc; 2795 const PetscInt *idxs,*idxsc; 2796 PetscInt i,s,*nnz; 2797 2798 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2799 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2800 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2801 /* local change of basis for pressures */ 2802 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2803 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2804 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2805 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2806 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2807 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2808 for (i=0;i<pcbddc->benign_n;i++) { 2809 PetscInt nzs,j; 2810 2811 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2812 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2813 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2814 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2815 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2816 } 2817 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2818 ierr = PetscFree(nnz);CHKERRQ(ierr); 2819 /* set identity on velocities */ 2820 for (i=0;i<n-nz;i++) { 2821 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2822 } 2823 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2824 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2825 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2826 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2827 /* set change on pressures */ 2828 for (s=0;s<pcbddc->benign_n;s++) { 2829 PetscScalar *array; 2830 PetscInt nzs; 2831 2832 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2833 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2834 for (i=0;i<nzs-1;i++) { 2835 PetscScalar vals[2]; 2836 PetscInt cols[2]; 2837 2838 cols[0] = idxs[i]; 2839 cols[1] = idxs[nzs-1]; 2840 vals[0] = 1.; 2841 vals[1] = 1.; 2842 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2843 } 2844 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2845 for (i=0;i<nzs-1;i++) array[i] = -1.; 2846 array[nzs-1] = 1.; 2847 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2848 /* store local idxs for p0 */ 2849 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2850 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2851 ierr = PetscFree(array);CHKERRQ(ierr); 2852 } 2853 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2854 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2855 /* project if needed */ 2856 if (pcbddc->benign_change_explicit) { 2857 Mat M; 2858 2859 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2860 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2861 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2862 ierr = MatDestroy(&M);CHKERRQ(ierr); 2863 } 2864 /* store global idxs for p0 */ 2865 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2866 } 2867 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2868 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2869 2870 /* determines if the coarse solver will be singular or not */ 2871 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2872 /* determines if the problem has subdomains with 0 pressure block */ 2873 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2874 *zerodiaglocal = zerodiag; 2875 PetscFunctionReturn(0); 2876 } 2877 2878 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2879 { 2880 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2881 PetscScalar *array; 2882 PetscErrorCode ierr; 2883 2884 PetscFunctionBegin; 2885 if (!pcbddc->benign_sf) { 2886 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2887 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2888 } 2889 if (get) { 2890 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2891 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2892 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2893 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2894 } else { 2895 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2896 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2897 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2898 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2899 } 2900 PetscFunctionReturn(0); 2901 } 2902 2903 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2904 { 2905 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2906 PetscErrorCode ierr; 2907 2908 PetscFunctionBegin; 2909 /* TODO: add error checking 2910 - avoid nested pop (or push) calls. 2911 - cannot push before pop. 2912 - cannot call this if pcbddc->local_mat is NULL 2913 */ 2914 if (!pcbddc->benign_n) { 2915 PetscFunctionReturn(0); 2916 } 2917 if (pop) { 2918 if (pcbddc->benign_change_explicit) { 2919 IS is_p0; 2920 MatReuse reuse; 2921 2922 /* extract B_0 */ 2923 reuse = MAT_INITIAL_MATRIX; 2924 if (pcbddc->benign_B0) { 2925 reuse = MAT_REUSE_MATRIX; 2926 } 2927 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2928 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2929 /* remove rows and cols from local problem */ 2930 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2931 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2932 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2933 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2934 } else { 2935 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2936 PetscScalar *vals; 2937 PetscInt i,n,*idxs_ins; 2938 2939 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2940 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2941 if (!pcbddc->benign_B0) { 2942 PetscInt *nnz; 2943 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2944 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2945 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2946 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2947 for (i=0;i<pcbddc->benign_n;i++) { 2948 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2949 nnz[i] = n - nnz[i]; 2950 } 2951 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2952 ierr = PetscFree(nnz);CHKERRQ(ierr); 2953 } 2954 2955 for (i=0;i<pcbddc->benign_n;i++) { 2956 PetscScalar *array; 2957 PetscInt *idxs,j,nz,cum; 2958 2959 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2960 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2961 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2962 for (j=0;j<nz;j++) vals[j] = 1.; 2963 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2964 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2965 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2966 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2967 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2968 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2969 cum = 0; 2970 for (j=0;j<n;j++) { 2971 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2972 vals[cum] = array[j]; 2973 idxs_ins[cum] = j; 2974 cum++; 2975 } 2976 } 2977 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2978 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2979 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2980 } 2981 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2982 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2983 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2984 } 2985 } else { /* push */ 2986 if (pcbddc->benign_change_explicit) { 2987 PetscInt i; 2988 2989 for (i=0;i<pcbddc->benign_n;i++) { 2990 PetscScalar *B0_vals; 2991 PetscInt *B0_cols,B0_ncol; 2992 2993 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2994 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2995 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2996 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2997 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2998 } 2999 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3000 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3001 } else { 3002 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 3003 } 3004 } 3005 PetscFunctionReturn(0); 3006 } 3007 3008 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3009 { 3010 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3011 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3012 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3013 PetscBLASInt *B_iwork,*B_ifail; 3014 PetscScalar *work,lwork; 3015 PetscScalar *St,*S,*eigv; 3016 PetscScalar *Sarray,*Starray; 3017 PetscReal *eigs,thresh; 3018 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3019 PetscBool allocated_S_St; 3020 #if defined(PETSC_USE_COMPLEX) 3021 PetscReal *rwork; 3022 #endif 3023 PetscErrorCode ierr; 3024 3025 PetscFunctionBegin; 3026 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3027 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3028 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\nRerun with -sub_schurs_hermitian 1 -sub_schurs_posdef 1 if the problem is SPD",sub_schurs->is_hermitian,sub_schurs->is_posdef); 3029 3030 if (pcbddc->dbg_flag) { 3031 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3032 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3033 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3034 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3035 } 3036 3037 if (pcbddc->dbg_flag) { 3038 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 3039 } 3040 3041 /* max size of subsets */ 3042 mss = 0; 3043 for (i=0;i<sub_schurs->n_subs;i++) { 3044 PetscInt subset_size; 3045 3046 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3047 mss = PetscMax(mss,subset_size); 3048 } 3049 3050 /* min/max and threshold */ 3051 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3052 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3053 nmax = PetscMax(nmin,nmax); 3054 allocated_S_St = PETSC_FALSE; 3055 if (nmin) { 3056 allocated_S_St = PETSC_TRUE; 3057 } 3058 3059 /* allocate lapack workspace */ 3060 cum = cum2 = 0; 3061 maxneigs = 0; 3062 for (i=0;i<sub_schurs->n_subs;i++) { 3063 PetscInt n,subset_size; 3064 3065 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3066 n = PetscMin(subset_size,nmax); 3067 cum += subset_size; 3068 cum2 += subset_size*n; 3069 maxneigs = PetscMax(maxneigs,n); 3070 } 3071 if (mss) { 3072 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 3073 PetscBLASInt B_itype = 1; 3074 PetscBLASInt B_N = mss; 3075 PetscReal zero = 0.0; 3076 PetscReal eps = 0.0; /* dlamch? */ 3077 3078 B_lwork = -1; 3079 S = NULL; 3080 St = NULL; 3081 eigs = NULL; 3082 eigv = NULL; 3083 B_iwork = NULL; 3084 B_ifail = NULL; 3085 #if defined(PETSC_USE_COMPLEX) 3086 rwork = NULL; 3087 #endif 3088 thresh = 1.0; 3089 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3090 #if defined(PETSC_USE_COMPLEX) 3091 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3092 #else 3093 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3094 #endif 3095 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3096 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3097 } else { 3098 /* TODO */ 3099 } 3100 } else { 3101 lwork = 0; 3102 } 3103 3104 nv = 0; 3105 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3106 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3107 } 3108 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3109 if (allocated_S_St) { 3110 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3111 } 3112 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3113 #if defined(PETSC_USE_COMPLEX) 3114 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3115 #endif 3116 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3117 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3118 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3119 nv+cum,&pcbddc->adaptive_constraints_idxs, 3120 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3121 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 3122 3123 maxneigs = 0; 3124 cum = cumarray = 0; 3125 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3126 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3127 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3128 const PetscInt *idxs; 3129 3130 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3131 for (cum=0;cum<nv;cum++) { 3132 pcbddc->adaptive_constraints_n[cum] = 1; 3133 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3134 pcbddc->adaptive_constraints_data[cum] = 1.0; 3135 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3136 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3137 } 3138 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3139 } 3140 3141 if (mss) { /* multilevel */ 3142 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3143 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3144 } 3145 3146 thresh = pcbddc->adaptive_threshold; 3147 for (i=0;i<sub_schurs->n_subs;i++) { 3148 const PetscInt *idxs; 3149 PetscReal upper,lower; 3150 PetscInt j,subset_size,eigs_start = 0; 3151 PetscBLASInt B_N; 3152 PetscBool same_data = PETSC_FALSE; 3153 3154 if (pcbddc->use_deluxe_scaling) { 3155 upper = PETSC_MAX_REAL; 3156 lower = thresh; 3157 } else { 3158 upper = 1./thresh; 3159 lower = 0.; 3160 } 3161 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3162 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3163 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3164 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3165 if (sub_schurs->is_hermitian) { 3166 PetscInt j,k; 3167 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 3168 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3169 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3170 } 3171 for (j=0;j<subset_size;j++) { 3172 for (k=j;k<subset_size;k++) { 3173 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3174 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3175 } 3176 } 3177 } else { 3178 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3179 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3180 } 3181 } else { 3182 S = Sarray + cumarray; 3183 St = Starray + cumarray; 3184 } 3185 /* see if we can save some work */ 3186 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3187 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 3188 } 3189 3190 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3191 B_neigs = 0; 3192 } else { 3193 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 3194 PetscBLASInt B_itype = 1; 3195 PetscBLASInt B_IL, B_IU; 3196 PetscReal eps = -1.0; /* dlamch? */ 3197 PetscInt nmin_s; 3198 PetscBool compute_range = PETSC_FALSE; 3199 3200 if (pcbddc->dbg_flag) { 3201 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]); 3202 } 3203 3204 compute_range = PETSC_FALSE; 3205 if (thresh > 1.+PETSC_SMALL && !same_data) { 3206 compute_range = PETSC_TRUE; 3207 } 3208 3209 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3210 if (compute_range) { 3211 3212 /* ask for eigenvalues larger than thresh */ 3213 #if defined(PETSC_USE_COMPLEX) 3214 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3215 #else 3216 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3217 #endif 3218 } else if (!same_data) { 3219 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3220 B_IL = 1; 3221 #if defined(PETSC_USE_COMPLEX) 3222 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3223 #else 3224 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3225 #endif 3226 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3227 PetscInt k; 3228 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3229 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3230 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3231 nmin = nmax; 3232 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 3233 for (k=0;k<nmax;k++) { 3234 eigs[k] = 1./PETSC_SMALL; 3235 eigv[k*(subset_size+1)] = 1.0; 3236 } 3237 } 3238 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3239 if (B_ierr) { 3240 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3241 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3242 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3243 } 3244 3245 if (B_neigs > nmax) { 3246 if (pcbddc->dbg_flag) { 3247 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 3248 } 3249 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 3250 B_neigs = nmax; 3251 } 3252 3253 nmin_s = PetscMin(nmin,B_N); 3254 if (B_neigs < nmin_s) { 3255 PetscBLASInt B_neigs2; 3256 3257 if (pcbddc->use_deluxe_scaling) { 3258 B_IL = B_N - nmin_s + 1; 3259 B_IU = B_N - B_neigs; 3260 } else { 3261 B_IL = B_neigs + 1; 3262 B_IU = nmin_s; 3263 } 3264 if (pcbddc->dbg_flag) { 3265 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 3266 } 3267 if (sub_schurs->is_hermitian) { 3268 PetscInt j,k; 3269 for (j=0;j<subset_size;j++) { 3270 for (k=j;k<subset_size;k++) { 3271 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3272 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3273 } 3274 } 3275 } else { 3276 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3277 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3278 } 3279 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3280 #if defined(PETSC_USE_COMPLEX) 3281 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3282 #else 3283 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3284 #endif 3285 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3286 B_neigs += B_neigs2; 3287 } 3288 if (B_ierr) { 3289 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3290 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3291 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3292 } 3293 if (pcbddc->dbg_flag) { 3294 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3295 for (j=0;j<B_neigs;j++) { 3296 if (eigs[j] == 0.0) { 3297 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3298 } else { 3299 if (pcbddc->use_deluxe_scaling) { 3300 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3301 } else { 3302 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3303 } 3304 } 3305 } 3306 } 3307 } else { 3308 /* TODO */ 3309 } 3310 } 3311 /* change the basis back to the original one */ 3312 if (sub_schurs->change) { 3313 Mat change,phi,phit; 3314 3315 if (pcbddc->dbg_flag > 2) { 3316 PetscInt ii; 3317 for (ii=0;ii<B_neigs;ii++) { 3318 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3319 for (j=0;j<B_N;j++) { 3320 #if defined(PETSC_USE_COMPLEX) 3321 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3322 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3323 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3324 #else 3325 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3326 #endif 3327 } 3328 } 3329 } 3330 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3331 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3332 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3333 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3334 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3335 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3336 } 3337 maxneigs = PetscMax(B_neigs,maxneigs); 3338 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3339 if (B_neigs) { 3340 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3341 3342 if (pcbddc->dbg_flag > 1) { 3343 PetscInt ii; 3344 for (ii=0;ii<B_neigs;ii++) { 3345 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3346 for (j=0;j<B_N;j++) { 3347 #if defined(PETSC_USE_COMPLEX) 3348 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3349 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3350 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3351 #else 3352 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3353 #endif 3354 } 3355 } 3356 } 3357 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3358 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3359 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3360 cum++; 3361 } 3362 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3363 /* shift for next computation */ 3364 cumarray += subset_size*subset_size; 3365 } 3366 if (pcbddc->dbg_flag) { 3367 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3368 } 3369 3370 if (mss) { 3371 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3372 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3373 /* destroy matrices (junk) */ 3374 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3375 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3376 } 3377 if (allocated_S_St) { 3378 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3379 } 3380 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3381 #if defined(PETSC_USE_COMPLEX) 3382 ierr = PetscFree(rwork);CHKERRQ(ierr); 3383 #endif 3384 if (pcbddc->dbg_flag) { 3385 PetscInt maxneigs_r; 3386 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3387 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3388 } 3389 PetscFunctionReturn(0); 3390 } 3391 3392 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3393 { 3394 PetscScalar *coarse_submat_vals; 3395 PetscErrorCode ierr; 3396 3397 PetscFunctionBegin; 3398 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3399 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3400 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3401 3402 /* Setup local neumann solver ksp_R */ 3403 /* PCBDDCSetUpLocalScatters should be called first! */ 3404 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3405 3406 /* 3407 Setup local correction and local part of coarse basis. 3408 Gives back the dense local part of the coarse matrix in column major ordering 3409 */ 3410 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3411 3412 /* Compute total number of coarse nodes and setup coarse solver */ 3413 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3414 3415 /* free */ 3416 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3417 PetscFunctionReturn(0); 3418 } 3419 3420 PetscErrorCode PCBDDCResetCustomization(PC pc) 3421 { 3422 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3423 PetscErrorCode ierr; 3424 3425 PetscFunctionBegin; 3426 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3427 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3428 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3429 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3430 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3431 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3432 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3433 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3434 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3435 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3436 PetscFunctionReturn(0); 3437 } 3438 3439 PetscErrorCode PCBDDCResetTopography(PC pc) 3440 { 3441 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3442 PetscInt i; 3443 PetscErrorCode ierr; 3444 3445 PetscFunctionBegin; 3446 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3447 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3448 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3449 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3450 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3451 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3452 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3453 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3454 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3455 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3456 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3457 for (i=0;i<pcbddc->n_local_subs;i++) { 3458 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3459 } 3460 pcbddc->n_local_subs = 0; 3461 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3462 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3463 pcbddc->graphanalyzed = PETSC_FALSE; 3464 pcbddc->recompute_topography = PETSC_TRUE; 3465 PetscFunctionReturn(0); 3466 } 3467 3468 PetscErrorCode PCBDDCResetSolvers(PC pc) 3469 { 3470 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3471 PetscErrorCode ierr; 3472 3473 PetscFunctionBegin; 3474 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3475 if (pcbddc->coarse_phi_B) { 3476 PetscScalar *array; 3477 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3478 ierr = PetscFree(array);CHKERRQ(ierr); 3479 } 3480 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3481 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3482 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3483 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3484 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3485 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3486 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3487 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3488 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3489 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3490 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3491 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3492 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3493 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3494 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3495 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3496 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3497 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3498 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3499 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3500 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3501 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3502 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3503 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3504 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3505 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3506 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3507 if (pcbddc->benign_zerodiag_subs) { 3508 PetscInt i; 3509 for (i=0;i<pcbddc->benign_n;i++) { 3510 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3511 } 3512 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3513 } 3514 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3515 PetscFunctionReturn(0); 3516 } 3517 3518 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3519 { 3520 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3521 PC_IS *pcis = (PC_IS*)pc->data; 3522 VecType impVecType; 3523 PetscInt n_constraints,n_R,old_size; 3524 PetscErrorCode ierr; 3525 3526 PetscFunctionBegin; 3527 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3528 n_R = pcis->n - pcbddc->n_vertices; 3529 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3530 /* local work vectors (try to avoid unneeded work)*/ 3531 /* R nodes */ 3532 old_size = -1; 3533 if (pcbddc->vec1_R) { 3534 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3535 } 3536 if (n_R != old_size) { 3537 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3538 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3539 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3540 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3541 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3542 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3543 } 3544 /* local primal dofs */ 3545 old_size = -1; 3546 if (pcbddc->vec1_P) { 3547 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3548 } 3549 if (pcbddc->local_primal_size != old_size) { 3550 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3551 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3552 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3553 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3554 } 3555 /* local explicit constraints */ 3556 old_size = -1; 3557 if (pcbddc->vec1_C) { 3558 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3559 } 3560 if (n_constraints && n_constraints != old_size) { 3561 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3562 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3563 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3564 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3565 } 3566 PetscFunctionReturn(0); 3567 } 3568 3569 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3570 { 3571 PetscErrorCode ierr; 3572 /* pointers to pcis and pcbddc */ 3573 PC_IS* pcis = (PC_IS*)pc->data; 3574 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3575 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3576 /* submatrices of local problem */ 3577 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3578 /* submatrices of local coarse problem */ 3579 Mat S_VV,S_CV,S_VC,S_CC; 3580 /* working matrices */ 3581 Mat C_CR; 3582 /* additional working stuff */ 3583 PC pc_R; 3584 Mat F,Brhs = NULL; 3585 Vec dummy_vec; 3586 PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs; 3587 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3588 PetscScalar *work; 3589 PetscInt *idx_V_B; 3590 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3591 PetscInt i,n_R,n_D,n_B; 3592 3593 /* some shortcuts to scalars */ 3594 PetscScalar one=1.0,m_one=-1.0; 3595 3596 PetscFunctionBegin; 3597 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3598 3599 /* Set Non-overlapping dimensions */ 3600 n_vertices = pcbddc->n_vertices; 3601 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3602 n_B = pcis->n_B; 3603 n_D = pcis->n - n_B; 3604 n_R = pcis->n - n_vertices; 3605 3606 /* vertices in boundary numbering */ 3607 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3608 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3609 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3610 3611 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3612 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3613 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3614 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3615 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3616 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3617 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3618 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3619 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3620 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3621 3622 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3623 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3624 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3625 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3626 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3627 lda_rhs = n_R; 3628 need_benign_correction = PETSC_FALSE; 3629 if (isLU || isILU || isCHOL) { 3630 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3631 } else if (sub_schurs && sub_schurs->reuse_solver) { 3632 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3633 MatFactorType type; 3634 3635 F = reuse_solver->F; 3636 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3637 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3638 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3639 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3640 } else { 3641 F = NULL; 3642 } 3643 3644 /* determine if we can use a sparse right-hand side */ 3645 sparserhs = PETSC_FALSE; 3646 if (F) { 3647 MatSolverType solver; 3648 3649 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3650 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3651 } 3652 3653 /* allocate workspace */ 3654 n = 0; 3655 if (n_constraints) { 3656 n += lda_rhs*n_constraints; 3657 } 3658 if (n_vertices) { 3659 n = PetscMax(2*lda_rhs*n_vertices,n); 3660 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3661 } 3662 if (!pcbddc->symmetric_primal) { 3663 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3664 } 3665 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3666 3667 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3668 dummy_vec = NULL; 3669 if (need_benign_correction && lda_rhs != n_R && F) { 3670 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3671 } 3672 3673 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3674 if (n_constraints) { 3675 Mat M3,C_B; 3676 IS is_aux; 3677 PetscScalar *array,*array2; 3678 3679 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3680 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3681 3682 /* Extract constraints on R nodes: C_{CR} */ 3683 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3684 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3685 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3686 3687 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3688 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3689 if (!sparserhs) { 3690 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3691 for (i=0;i<n_constraints;i++) { 3692 const PetscScalar *row_cmat_values; 3693 const PetscInt *row_cmat_indices; 3694 PetscInt size_of_constraint,j; 3695 3696 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3697 for (j=0;j<size_of_constraint;j++) { 3698 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3699 } 3700 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3701 } 3702 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 3703 } else { 3704 Mat tC_CR; 3705 3706 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3707 if (lda_rhs != n_R) { 3708 PetscScalar *aa; 3709 PetscInt r,*ii,*jj; 3710 PetscBool done; 3711 3712 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3713 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 3714 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 3715 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 3716 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3717 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 3718 } else { 3719 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 3720 tC_CR = C_CR; 3721 } 3722 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 3723 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 3724 } 3725 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3726 if (F) { 3727 if (need_benign_correction) { 3728 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3729 3730 /* rhs is already zero on interior dofs, no need to change the rhs */ 3731 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3732 } 3733 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 3734 if (need_benign_correction) { 3735 PetscScalar *marr; 3736 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3737 3738 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3739 if (lda_rhs != n_R) { 3740 for (i=0;i<n_constraints;i++) { 3741 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3742 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3743 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3744 } 3745 } else { 3746 for (i=0;i<n_constraints;i++) { 3747 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3748 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3749 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3750 } 3751 } 3752 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3753 } 3754 } else { 3755 PetscScalar *marr; 3756 3757 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3758 for (i=0;i<n_constraints;i++) { 3759 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3760 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3761 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3762 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3763 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3764 } 3765 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3766 } 3767 if (sparserhs) { 3768 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3769 } 3770 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3771 if (!pcbddc->switch_static) { 3772 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3773 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3774 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3775 for (i=0;i<n_constraints;i++) { 3776 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3777 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3778 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3779 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3780 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3781 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3782 } 3783 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3784 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3785 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3786 } else { 3787 if (lda_rhs != n_R) { 3788 IS dummy; 3789 3790 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3791 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3792 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3793 } else { 3794 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3795 pcbddc->local_auxmat2 = local_auxmat2_R; 3796 } 3797 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3798 } 3799 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3800 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3801 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3802 if (isCHOL) { 3803 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3804 } else { 3805 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3806 } 3807 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 3808 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3809 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3810 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3811 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3812 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3813 } 3814 3815 /* Get submatrices from subdomain matrix */ 3816 if (n_vertices) { 3817 IS is_aux; 3818 PetscBool isseqaij; 3819 3820 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3821 IS tis; 3822 3823 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3824 ierr = ISSort(tis);CHKERRQ(ierr); 3825 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3826 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3827 } else { 3828 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3829 } 3830 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3831 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3832 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3833 if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */ 3834 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3835 } 3836 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3837 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3838 } 3839 3840 /* Matrix of coarse basis functions (local) */ 3841 if (pcbddc->coarse_phi_B) { 3842 PetscInt on_B,on_primal,on_D=n_D; 3843 if (pcbddc->coarse_phi_D) { 3844 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3845 } 3846 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3847 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3848 PetscScalar *marray; 3849 3850 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3851 ierr = PetscFree(marray);CHKERRQ(ierr); 3852 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3853 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3854 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3855 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3856 } 3857 } 3858 3859 if (!pcbddc->coarse_phi_B) { 3860 PetscScalar *marr; 3861 3862 /* memory size */ 3863 n = n_B*pcbddc->local_primal_size; 3864 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 3865 if (!pcbddc->symmetric_primal) n *= 2; 3866 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 3867 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3868 marr += n_B*pcbddc->local_primal_size; 3869 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3870 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3871 marr += n_D*pcbddc->local_primal_size; 3872 } 3873 if (!pcbddc->symmetric_primal) { 3874 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3875 marr += n_B*pcbddc->local_primal_size; 3876 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3877 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3878 } 3879 } else { 3880 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3881 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3882 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3883 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3884 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3885 } 3886 } 3887 } 3888 3889 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3890 p0_lidx_I = NULL; 3891 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3892 const PetscInt *idxs; 3893 3894 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3895 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3896 for (i=0;i<pcbddc->benign_n;i++) { 3897 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3898 } 3899 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3900 } 3901 3902 /* vertices */ 3903 if (n_vertices) { 3904 PetscBool restoreavr = PETSC_FALSE; 3905 3906 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3907 3908 if (n_R) { 3909 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3910 PetscBLASInt B_N,B_one = 1; 3911 PetscScalar *x,*y; 3912 3913 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3914 if (need_benign_correction) { 3915 ISLocalToGlobalMapping RtoN; 3916 IS is_p0; 3917 PetscInt *idxs_p0,n; 3918 3919 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3920 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3921 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3922 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3923 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3924 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3925 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3926 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3927 } 3928 3929 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3930 if (!sparserhs || need_benign_correction) { 3931 if (lda_rhs == n_R) { 3932 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3933 } else { 3934 PetscScalar *av,*array; 3935 const PetscInt *xadj,*adjncy; 3936 PetscInt n; 3937 PetscBool flg_row; 3938 3939 array = work+lda_rhs*n_vertices; 3940 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3941 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3942 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3943 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3944 for (i=0;i<n;i++) { 3945 PetscInt j; 3946 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3947 } 3948 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3949 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3950 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3951 } 3952 if (need_benign_correction) { 3953 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3954 PetscScalar *marr; 3955 3956 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3957 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3958 3959 | 0 0 0 | (V) 3960 L = | 0 0 -1 | (P-p0) 3961 | 0 0 -1 | (p0) 3962 3963 */ 3964 for (i=0;i<reuse_solver->benign_n;i++) { 3965 const PetscScalar *vals; 3966 const PetscInt *idxs,*idxs_zero; 3967 PetscInt n,j,nz; 3968 3969 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3970 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3971 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3972 for (j=0;j<n;j++) { 3973 PetscScalar val = vals[j]; 3974 PetscInt k,col = idxs[j]; 3975 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3976 } 3977 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3978 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3979 } 3980 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3981 } 3982 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 3983 Brhs = A_RV; 3984 } else { 3985 Mat tA_RVT,A_RVT; 3986 3987 if (!pcbddc->symmetric_primal) { 3988 /* A_RV already scaled by -1 */ 3989 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 3990 } else { 3991 restoreavr = PETSC_TRUE; 3992 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 3993 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 3994 A_RVT = A_VR; 3995 } 3996 if (lda_rhs != n_R) { 3997 PetscScalar *aa; 3998 PetscInt r,*ii,*jj; 3999 PetscBool done; 4000 4001 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4002 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4003 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4004 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4005 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4006 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4007 } else { 4008 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4009 tA_RVT = A_RVT; 4010 } 4011 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4012 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4013 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4014 } 4015 if (F) { 4016 /* need to correct the rhs */ 4017 if (need_benign_correction) { 4018 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4019 PetscScalar *marr; 4020 4021 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4022 if (lda_rhs != n_R) { 4023 for (i=0;i<n_vertices;i++) { 4024 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4025 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4026 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4027 } 4028 } else { 4029 for (i=0;i<n_vertices;i++) { 4030 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4031 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4032 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4033 } 4034 } 4035 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4036 } 4037 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4038 if (restoreavr) { 4039 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4040 } 4041 /* need to correct the solution */ 4042 if (need_benign_correction) { 4043 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4044 PetscScalar *marr; 4045 4046 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4047 if (lda_rhs != n_R) { 4048 for (i=0;i<n_vertices;i++) { 4049 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4050 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4051 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4052 } 4053 } else { 4054 for (i=0;i<n_vertices;i++) { 4055 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4056 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4057 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4058 } 4059 } 4060 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4061 } 4062 } else { 4063 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4064 for (i=0;i<n_vertices;i++) { 4065 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4066 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4067 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4068 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4069 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4070 } 4071 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4072 } 4073 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4074 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4075 /* S_VV and S_CV */ 4076 if (n_constraints) { 4077 Mat B; 4078 4079 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4080 for (i=0;i<n_vertices;i++) { 4081 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4082 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4083 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4084 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4085 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4086 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4087 } 4088 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4089 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 4090 ierr = MatDestroy(&B);CHKERRQ(ierr); 4091 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4092 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4093 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4094 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4095 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4096 ierr = MatDestroy(&B);CHKERRQ(ierr); 4097 } 4098 if (lda_rhs != n_R) { 4099 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4100 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4101 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4102 } 4103 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4104 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4105 if (need_benign_correction) { 4106 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4107 PetscScalar *marr,*sums; 4108 4109 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4110 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4111 for (i=0;i<reuse_solver->benign_n;i++) { 4112 const PetscScalar *vals; 4113 const PetscInt *idxs,*idxs_zero; 4114 PetscInt n,j,nz; 4115 4116 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4117 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4118 for (j=0;j<n_vertices;j++) { 4119 PetscInt k; 4120 sums[j] = 0.; 4121 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4122 } 4123 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4124 for (j=0;j<n;j++) { 4125 PetscScalar val = vals[j]; 4126 PetscInt k; 4127 for (k=0;k<n_vertices;k++) { 4128 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4129 } 4130 } 4131 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4132 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4133 } 4134 ierr = PetscFree(sums);CHKERRQ(ierr); 4135 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4136 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4137 } 4138 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4139 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4140 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 4141 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4142 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4143 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 4144 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4145 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4146 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4147 } else { 4148 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4149 } 4150 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4151 4152 /* coarse basis functions */ 4153 for (i=0;i<n_vertices;i++) { 4154 PetscScalar *y; 4155 4156 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4157 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4158 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4159 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4160 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4161 y[n_B*i+idx_V_B[i]] = 1.0; 4162 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4163 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4164 4165 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4166 PetscInt j; 4167 4168 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4169 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4170 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4171 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4172 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4173 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4174 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4175 } 4176 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4177 } 4178 /* if n_R == 0 the object is not destroyed */ 4179 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4180 } 4181 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4182 4183 if (n_constraints) { 4184 Mat B; 4185 4186 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4187 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4188 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4189 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4190 if (n_vertices) { 4191 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4192 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4193 } else { 4194 Mat S_VCt; 4195 4196 if (lda_rhs != n_R) { 4197 ierr = MatDestroy(&B);CHKERRQ(ierr); 4198 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4199 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4200 } 4201 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4202 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4203 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4204 } 4205 } 4206 ierr = MatDestroy(&B);CHKERRQ(ierr); 4207 /* coarse basis functions */ 4208 for (i=0;i<n_constraints;i++) { 4209 PetscScalar *y; 4210 4211 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4212 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4213 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4214 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4215 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4216 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4217 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4218 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4219 PetscInt j; 4220 4221 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4222 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4223 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4224 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4225 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4226 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4227 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4228 } 4229 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4230 } 4231 } 4232 if (n_constraints) { 4233 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4234 } 4235 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4236 4237 /* coarse matrix entries relative to B_0 */ 4238 if (pcbddc->benign_n) { 4239 Mat B0_B,B0_BPHI; 4240 IS is_dummy; 4241 PetscScalar *data; 4242 PetscInt j; 4243 4244 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4245 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4246 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4247 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4248 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4249 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 4250 for (j=0;j<pcbddc->benign_n;j++) { 4251 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4252 for (i=0;i<pcbddc->local_primal_size;i++) { 4253 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4254 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4255 } 4256 } 4257 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 4258 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4259 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4260 } 4261 4262 /* compute other basis functions for non-symmetric problems */ 4263 if (!pcbddc->symmetric_primal) { 4264 Mat B_V=NULL,B_C=NULL; 4265 PetscScalar *marray; 4266 4267 if (n_constraints) { 4268 Mat S_CCT,C_CRT; 4269 4270 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4271 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4272 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4273 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4274 if (n_vertices) { 4275 Mat S_VCT; 4276 4277 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4278 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4279 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4280 } 4281 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4282 } else { 4283 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4284 } 4285 if (n_vertices && n_R) { 4286 PetscScalar *av,*marray; 4287 const PetscInt *xadj,*adjncy; 4288 PetscInt n; 4289 PetscBool flg_row; 4290 4291 /* B_V = B_V - A_VR^T */ 4292 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4293 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4294 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4295 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4296 for (i=0;i<n;i++) { 4297 PetscInt j; 4298 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4299 } 4300 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4301 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4302 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4303 } 4304 4305 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4306 if (n_vertices) { 4307 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4308 for (i=0;i<n_vertices;i++) { 4309 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4310 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4311 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4312 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4313 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4314 } 4315 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4316 } 4317 if (B_C) { 4318 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4319 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4320 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4321 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4322 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4323 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4324 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4325 } 4326 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4327 } 4328 /* coarse basis functions */ 4329 for (i=0;i<pcbddc->local_primal_size;i++) { 4330 PetscScalar *y; 4331 4332 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4333 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4334 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4335 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4336 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4337 if (i<n_vertices) { 4338 y[n_B*i+idx_V_B[i]] = 1.0; 4339 } 4340 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4341 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4342 4343 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4344 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4345 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4346 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4347 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4348 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4349 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4350 } 4351 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4352 } 4353 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4354 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4355 } 4356 4357 /* free memory */ 4358 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4359 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4360 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4361 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4362 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4363 ierr = PetscFree(work);CHKERRQ(ierr); 4364 if (n_vertices) { 4365 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4366 } 4367 if (n_constraints) { 4368 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4369 } 4370 /* Checking coarse_sub_mat and coarse basis functios */ 4371 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4372 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4373 if (pcbddc->dbg_flag) { 4374 Mat coarse_sub_mat; 4375 Mat AUXMAT,TM1,TM2,TM3,TM4; 4376 Mat coarse_phi_D,coarse_phi_B; 4377 Mat coarse_psi_D,coarse_psi_B; 4378 Mat A_II,A_BB,A_IB,A_BI; 4379 Mat C_B,CPHI; 4380 IS is_dummy; 4381 Vec mones; 4382 MatType checkmattype=MATSEQAIJ; 4383 PetscReal real_value; 4384 4385 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4386 Mat A; 4387 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4388 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4389 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4390 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4391 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4392 ierr = MatDestroy(&A);CHKERRQ(ierr); 4393 } else { 4394 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4395 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4396 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4397 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4398 } 4399 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4400 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4401 if (!pcbddc->symmetric_primal) { 4402 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4403 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4404 } 4405 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4406 4407 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4408 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4409 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4410 if (!pcbddc->symmetric_primal) { 4411 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4412 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4413 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4414 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4415 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4416 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4417 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4418 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4419 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4420 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4421 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4422 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4423 } else { 4424 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4425 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4426 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4427 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4428 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4429 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4430 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4431 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4432 } 4433 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4434 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4435 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4436 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4437 if (pcbddc->benign_n) { 4438 Mat B0_B,B0_BPHI; 4439 PetscScalar *data,*data2; 4440 PetscInt j; 4441 4442 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4443 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4444 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4445 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4446 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4447 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4448 for (j=0;j<pcbddc->benign_n;j++) { 4449 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4450 for (i=0;i<pcbddc->local_primal_size;i++) { 4451 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4452 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4453 } 4454 } 4455 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4456 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4457 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4458 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4459 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4460 } 4461 #if 0 4462 { 4463 PetscViewer viewer; 4464 char filename[256]; 4465 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4466 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4467 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4468 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4469 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4470 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4471 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4472 if (pcbddc->coarse_phi_B) { 4473 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4474 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4475 } 4476 if (pcbddc->coarse_phi_D) { 4477 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4478 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4479 } 4480 if (pcbddc->coarse_psi_B) { 4481 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4482 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4483 } 4484 if (pcbddc->coarse_psi_D) { 4485 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4486 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4487 } 4488 ierr = PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");CHKERRQ(ierr); 4489 ierr = MatView(pcbddc->local_mat,viewer);CHKERRQ(ierr); 4490 ierr = PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");CHKERRQ(ierr); 4491 ierr = MatView(pcbddc->ConstraintMatrix,viewer);CHKERRQ(ierr); 4492 ierr = PetscObjectSetName((PetscObject)pcis->is_I_local,"I");CHKERRQ(ierr); 4493 ierr = ISView(pcis->is_I_local,viewer);CHKERRQ(ierr); 4494 ierr = PetscObjectSetName((PetscObject)pcis->is_B_local,"B");CHKERRQ(ierr); 4495 ierr = ISView(pcis->is_B_local,viewer);CHKERRQ(ierr); 4496 ierr = PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");CHKERRQ(ierr); 4497 ierr = ISView(pcbddc->is_R_local,viewer);CHKERRQ(ierr); 4498 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4499 } 4500 #endif 4501 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4502 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4503 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4504 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4505 4506 /* check constraints */ 4507 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4508 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4509 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4510 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4511 } else { 4512 PetscScalar *data; 4513 Mat tmat; 4514 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4515 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4516 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4517 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4518 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4519 } 4520 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4521 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4522 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4523 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4524 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4525 if (!pcbddc->symmetric_primal) { 4526 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4527 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4528 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4529 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4530 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4531 } 4532 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4533 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4534 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4535 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4536 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4537 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4538 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4539 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4540 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4541 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4542 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4543 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4544 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4545 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4546 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4547 if (!pcbddc->symmetric_primal) { 4548 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4549 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4550 } 4551 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4552 } 4553 /* get back data */ 4554 *coarse_submat_vals_n = coarse_submat_vals; 4555 PetscFunctionReturn(0); 4556 } 4557 4558 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4559 { 4560 Mat *work_mat; 4561 IS isrow_s,iscol_s; 4562 PetscBool rsorted,csorted; 4563 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4564 PetscErrorCode ierr; 4565 4566 PetscFunctionBegin; 4567 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4568 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4569 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4570 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4571 4572 if (!rsorted) { 4573 const PetscInt *idxs; 4574 PetscInt *idxs_sorted,i; 4575 4576 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4577 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4578 for (i=0;i<rsize;i++) { 4579 idxs_perm_r[i] = i; 4580 } 4581 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4582 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4583 for (i=0;i<rsize;i++) { 4584 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4585 } 4586 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4587 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4588 } else { 4589 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4590 isrow_s = isrow; 4591 } 4592 4593 if (!csorted) { 4594 if (isrow == iscol) { 4595 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4596 iscol_s = isrow_s; 4597 } else { 4598 const PetscInt *idxs; 4599 PetscInt *idxs_sorted,i; 4600 4601 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4602 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4603 for (i=0;i<csize;i++) { 4604 idxs_perm_c[i] = i; 4605 } 4606 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4607 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4608 for (i=0;i<csize;i++) { 4609 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4610 } 4611 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4612 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4613 } 4614 } else { 4615 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4616 iscol_s = iscol; 4617 } 4618 4619 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4620 4621 if (!rsorted || !csorted) { 4622 Mat new_mat; 4623 IS is_perm_r,is_perm_c; 4624 4625 if (!rsorted) { 4626 PetscInt *idxs_r,i; 4627 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4628 for (i=0;i<rsize;i++) { 4629 idxs_r[idxs_perm_r[i]] = i; 4630 } 4631 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4632 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4633 } else { 4634 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4635 } 4636 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4637 4638 if (!csorted) { 4639 if (isrow_s == iscol_s) { 4640 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4641 is_perm_c = is_perm_r; 4642 } else { 4643 PetscInt *idxs_c,i; 4644 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4645 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4646 for (i=0;i<csize;i++) { 4647 idxs_c[idxs_perm_c[i]] = i; 4648 } 4649 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4650 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4651 } 4652 } else { 4653 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4654 } 4655 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4656 4657 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4658 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4659 work_mat[0] = new_mat; 4660 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4661 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4662 } 4663 4664 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4665 *B = work_mat[0]; 4666 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4667 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4668 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4669 PetscFunctionReturn(0); 4670 } 4671 4672 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4673 { 4674 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4675 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4676 Mat new_mat,lA; 4677 IS is_local,is_global; 4678 PetscInt local_size; 4679 PetscBool isseqaij; 4680 PetscErrorCode ierr; 4681 4682 PetscFunctionBegin; 4683 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4684 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4685 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4686 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4687 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4688 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4689 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4690 4691 /* check */ 4692 if (pcbddc->dbg_flag) { 4693 Vec x,x_change; 4694 PetscReal error; 4695 4696 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4697 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4698 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4699 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4700 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4701 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4702 if (!pcbddc->change_interior) { 4703 const PetscScalar *x,*y,*v; 4704 PetscReal lerror = 0.; 4705 PetscInt i; 4706 4707 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4708 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4709 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4710 for (i=0;i<local_size;i++) 4711 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4712 lerror = PetscAbsScalar(x[i]-y[i]); 4713 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4714 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4715 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4716 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4717 if (error > PETSC_SMALL) { 4718 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4719 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4720 } else { 4721 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4722 } 4723 } 4724 } 4725 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4726 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4727 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4728 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4729 if (error > PETSC_SMALL) { 4730 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4731 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4732 } else { 4733 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4734 } 4735 } 4736 ierr = VecDestroy(&x);CHKERRQ(ierr); 4737 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4738 } 4739 4740 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4741 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4742 4743 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4744 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4745 if (isseqaij) { 4746 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4747 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4748 if (lA) { 4749 Mat work; 4750 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4751 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4752 ierr = MatDestroy(&work);CHKERRQ(ierr); 4753 } 4754 } else { 4755 Mat work_mat; 4756 4757 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4758 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4759 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4760 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4761 if (lA) { 4762 Mat work; 4763 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4764 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4765 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4766 ierr = MatDestroy(&work);CHKERRQ(ierr); 4767 } 4768 } 4769 if (matis->A->symmetric_set) { 4770 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4771 #if !defined(PETSC_USE_COMPLEX) 4772 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4773 #endif 4774 } 4775 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4776 PetscFunctionReturn(0); 4777 } 4778 4779 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4780 { 4781 PC_IS* pcis = (PC_IS*)(pc->data); 4782 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4783 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4784 PetscInt *idx_R_local=NULL; 4785 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4786 PetscInt vbs,bs; 4787 PetscBT bitmask=NULL; 4788 PetscErrorCode ierr; 4789 4790 PetscFunctionBegin; 4791 /* 4792 No need to setup local scatters if 4793 - primal space is unchanged 4794 AND 4795 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4796 AND 4797 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4798 */ 4799 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4800 PetscFunctionReturn(0); 4801 } 4802 /* destroy old objects */ 4803 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4804 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4805 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4806 /* Set Non-overlapping dimensions */ 4807 n_B = pcis->n_B; 4808 n_D = pcis->n - n_B; 4809 n_vertices = pcbddc->n_vertices; 4810 4811 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4812 4813 /* create auxiliary bitmask and allocate workspace */ 4814 if (!sub_schurs || !sub_schurs->reuse_solver) { 4815 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4816 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4817 for (i=0;i<n_vertices;i++) { 4818 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4819 } 4820 4821 for (i=0, n_R=0; i<pcis->n; i++) { 4822 if (!PetscBTLookup(bitmask,i)) { 4823 idx_R_local[n_R++] = i; 4824 } 4825 } 4826 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4827 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4828 4829 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4830 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4831 } 4832 4833 /* Block code */ 4834 vbs = 1; 4835 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4836 if (bs>1 && !(n_vertices%bs)) { 4837 PetscBool is_blocked = PETSC_TRUE; 4838 PetscInt *vary; 4839 if (!sub_schurs || !sub_schurs->reuse_solver) { 4840 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4841 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4842 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4843 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4844 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4845 for (i=0; i<pcis->n/bs; i++) { 4846 if (vary[i]!=0 && vary[i]!=bs) { 4847 is_blocked = PETSC_FALSE; 4848 break; 4849 } 4850 } 4851 ierr = PetscFree(vary);CHKERRQ(ierr); 4852 } else { 4853 /* Verify directly the R set */ 4854 for (i=0; i<n_R/bs; i++) { 4855 PetscInt j,node=idx_R_local[bs*i]; 4856 for (j=1; j<bs; j++) { 4857 if (node != idx_R_local[bs*i+j]-j) { 4858 is_blocked = PETSC_FALSE; 4859 break; 4860 } 4861 } 4862 } 4863 } 4864 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4865 vbs = bs; 4866 for (i=0;i<n_R/vbs;i++) { 4867 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4868 } 4869 } 4870 } 4871 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4872 if (sub_schurs && sub_schurs->reuse_solver) { 4873 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4874 4875 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4876 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4877 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4878 reuse_solver->is_R = pcbddc->is_R_local; 4879 } else { 4880 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4881 } 4882 4883 /* print some info if requested */ 4884 if (pcbddc->dbg_flag) { 4885 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4886 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4887 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4888 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4889 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4890 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4891 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4892 } 4893 4894 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4895 if (!sub_schurs || !sub_schurs->reuse_solver) { 4896 IS is_aux1,is_aux2; 4897 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4898 4899 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4900 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4901 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4902 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4903 for (i=0; i<n_D; i++) { 4904 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4905 } 4906 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4907 for (i=0, j=0; i<n_R; i++) { 4908 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4909 aux_array1[j++] = i; 4910 } 4911 } 4912 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4913 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4914 for (i=0, j=0; i<n_B; i++) { 4915 if (!PetscBTLookup(bitmask,is_indices[i])) { 4916 aux_array2[j++] = i; 4917 } 4918 } 4919 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4920 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4921 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4922 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4923 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4924 4925 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4926 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4927 for (i=0, j=0; i<n_R; i++) { 4928 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4929 aux_array1[j++] = i; 4930 } 4931 } 4932 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4933 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4934 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4935 } 4936 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4937 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4938 } else { 4939 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4940 IS tis; 4941 PetscInt schur_size; 4942 4943 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4944 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4945 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4946 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4947 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4948 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4949 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4950 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4951 } 4952 } 4953 PetscFunctionReturn(0); 4954 } 4955 4956 4957 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4958 { 4959 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4960 PC_IS *pcis = (PC_IS*)pc->data; 4961 PC pc_temp; 4962 Mat A_RR; 4963 MatReuse reuse; 4964 PetscScalar m_one = -1.0; 4965 PetscReal value; 4966 PetscInt n_D,n_R; 4967 PetscBool check_corr,issbaij; 4968 PetscErrorCode ierr; 4969 /* prefixes stuff */ 4970 char dir_prefix[256],neu_prefix[256],str_level[16]; 4971 size_t len; 4972 4973 PetscFunctionBegin; 4974 4975 /* compute prefixes */ 4976 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4977 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4978 if (!pcbddc->current_level) { 4979 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4980 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4981 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4982 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4983 } else { 4984 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 4985 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4986 len -= 15; /* remove "pc_bddc_coarse_" */ 4987 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4988 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4989 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4990 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4991 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4992 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4993 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4994 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4995 } 4996 4997 /* DIRICHLET PROBLEM */ 4998 if (dirichlet) { 4999 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5000 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5001 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 5002 if (pcbddc->dbg_flag) { 5003 Mat A_IIn; 5004 5005 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5006 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5007 pcis->A_II = A_IIn; 5008 } 5009 } 5010 if (pcbddc->local_mat->symmetric_set) { 5011 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5012 } 5013 /* Matrix for Dirichlet problem is pcis->A_II */ 5014 n_D = pcis->n - pcis->n_B; 5015 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5016 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5017 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5018 /* default */ 5019 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5020 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5021 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5022 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5023 if (issbaij) { 5024 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5025 } else { 5026 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5027 } 5028 /* Allow user's customization */ 5029 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5030 } 5031 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 5032 if (sub_schurs && sub_schurs->reuse_solver) { 5033 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5034 5035 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5036 } 5037 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5038 if (!n_D) { 5039 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5040 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5041 } 5042 /* Set Up KSP for Dirichlet problem of BDDC */ 5043 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 5044 /* set ksp_D into pcis data */ 5045 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5046 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5047 pcis->ksp_D = pcbddc->ksp_D; 5048 } 5049 5050 /* NEUMANN PROBLEM */ 5051 A_RR = 0; 5052 if (neumann) { 5053 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5054 PetscInt ibs,mbs; 5055 PetscBool issbaij, reuse_neumann_solver; 5056 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5057 5058 reuse_neumann_solver = PETSC_FALSE; 5059 if (sub_schurs && sub_schurs->reuse_solver) { 5060 IS iP; 5061 5062 reuse_neumann_solver = PETSC_TRUE; 5063 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5064 if (iP) reuse_neumann_solver = PETSC_FALSE; 5065 } 5066 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5067 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5068 if (pcbddc->ksp_R) { /* already created ksp */ 5069 PetscInt nn_R; 5070 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5071 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5072 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5073 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5074 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5075 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5076 reuse = MAT_INITIAL_MATRIX; 5077 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5078 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5079 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5080 reuse = MAT_INITIAL_MATRIX; 5081 } else { /* safe to reuse the matrix */ 5082 reuse = MAT_REUSE_MATRIX; 5083 } 5084 } 5085 /* last check */ 5086 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5087 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5088 reuse = MAT_INITIAL_MATRIX; 5089 } 5090 } else { /* first time, so we need to create the matrix */ 5091 reuse = MAT_INITIAL_MATRIX; 5092 } 5093 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 5094 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5095 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5096 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5097 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5098 if (matis->A == pcbddc->local_mat) { 5099 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5100 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5101 } else { 5102 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5103 } 5104 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5105 if (matis->A == pcbddc->local_mat) { 5106 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5107 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5108 } else { 5109 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5110 } 5111 } 5112 /* extract A_RR */ 5113 if (reuse_neumann_solver) { 5114 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5115 5116 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5117 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5118 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5119 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5120 } else { 5121 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5122 } 5123 } else { 5124 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5125 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5126 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5127 } 5128 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5129 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5130 } 5131 if (pcbddc->local_mat->symmetric_set) { 5132 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5133 } 5134 if (!pcbddc->ksp_R) { /* create object if not present */ 5135 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5136 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5137 /* default */ 5138 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5139 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5140 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5141 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5142 if (issbaij) { 5143 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5144 } else { 5145 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5146 } 5147 /* Allow user's customization */ 5148 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5149 } 5150 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5151 if (!n_R) { 5152 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5153 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5154 } 5155 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5156 /* Reuse solver if it is present */ 5157 if (reuse_neumann_solver) { 5158 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5159 5160 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5161 } 5162 /* Set Up KSP for Neumann problem of BDDC */ 5163 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 5164 } 5165 5166 if (pcbddc->dbg_flag) { 5167 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5168 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5169 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5170 } 5171 5172 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5173 check_corr = PETSC_FALSE; 5174 if (pcbddc->NullSpace_corr[0]) { 5175 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5176 } 5177 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5178 check_corr = PETSC_TRUE; 5179 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5180 } 5181 if (neumann && pcbddc->NullSpace_corr[2]) { 5182 check_corr = PETSC_TRUE; 5183 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5184 } 5185 /* check Dirichlet and Neumann solvers */ 5186 if (pcbddc->dbg_flag) { 5187 if (dirichlet) { /* Dirichlet */ 5188 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5189 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5190 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5191 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5192 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5193 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5194 if (check_corr) { 5195 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 5196 } 5197 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5198 } 5199 if (neumann) { /* Neumann */ 5200 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5201 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5202 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5203 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5204 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5205 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5206 if (check_corr) { 5207 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 5208 } 5209 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5210 } 5211 } 5212 /* free Neumann problem's matrix */ 5213 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5214 PetscFunctionReturn(0); 5215 } 5216 5217 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5218 { 5219 PetscErrorCode ierr; 5220 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5221 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5222 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 5223 5224 PetscFunctionBegin; 5225 if (!reuse_solver) { 5226 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5227 } 5228 if (!pcbddc->switch_static) { 5229 if (applytranspose && pcbddc->local_auxmat1) { 5230 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5231 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5232 } 5233 if (!reuse_solver) { 5234 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5235 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5236 } else { 5237 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5238 5239 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5240 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5241 } 5242 } else { 5243 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5244 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5245 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5246 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5247 if (applytranspose && pcbddc->local_auxmat1) { 5248 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5249 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5250 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5251 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5252 } 5253 } 5254 if (!reuse_solver || pcbddc->switch_static) { 5255 if (applytranspose) { 5256 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5257 } else { 5258 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5259 } 5260 } else { 5261 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5262 5263 if (applytranspose) { 5264 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5265 } else { 5266 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5267 } 5268 } 5269 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5270 if (!pcbddc->switch_static) { 5271 if (!reuse_solver) { 5272 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5273 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5274 } else { 5275 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5276 5277 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5278 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5279 } 5280 if (!applytranspose && pcbddc->local_auxmat1) { 5281 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5282 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5283 } 5284 } else { 5285 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5286 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5287 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5288 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5289 if (!applytranspose && pcbddc->local_auxmat1) { 5290 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5291 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5292 } 5293 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5294 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5295 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5296 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5297 } 5298 PetscFunctionReturn(0); 5299 } 5300 5301 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5302 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5303 { 5304 PetscErrorCode ierr; 5305 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5306 PC_IS* pcis = (PC_IS*) (pc->data); 5307 const PetscScalar zero = 0.0; 5308 5309 PetscFunctionBegin; 5310 PetscBool ss = PETSC_FALSE; 5311 ierr = PetscOptionsGetBool(NULL,NULL,"-swap",&ss,NULL);CHKERRQ(ierr); 5312 if (ss) { 5313 Mat save_B = pcbddc->coarse_phi_B; 5314 pcbddc->coarse_phi_B = pcbddc->coarse_psi_B; 5315 pcbddc->coarse_psi_B = save_B; 5316 Mat save_D = pcbddc->coarse_phi_D; 5317 pcbddc->coarse_phi_D = pcbddc->coarse_psi_D; 5318 pcbddc->coarse_psi_D = save_D; 5319 } 5320 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5321 if (!pcbddc->benign_apply_coarse_only) { 5322 if (applytranspose) { 5323 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5324 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5325 } else { 5326 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5327 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5328 } 5329 } else { 5330 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5331 } 5332 5333 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5334 if (pcbddc->benign_n) { 5335 PetscScalar *array; 5336 PetscInt j; 5337 5338 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5339 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5340 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5341 } 5342 5343 /* start communications from local primal nodes to rhs of coarse solver */ 5344 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5345 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5346 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5347 5348 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5349 if (pcbddc->coarse_ksp) { 5350 Mat coarse_mat; 5351 Vec rhs,sol; 5352 MatNullSpace nullsp; 5353 PetscBool isbddc = PETSC_FALSE; 5354 5355 if (pcbddc->benign_have_null) { 5356 PC coarse_pc; 5357 5358 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5359 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5360 /* we need to propagate to coarser levels the need for a possible benign correction */ 5361 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5362 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5363 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5364 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5365 } 5366 } 5367 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5368 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5369 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5370 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5371 if (nullsp) { 5372 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5373 } 5374 if (applytranspose) { 5375 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5376 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5377 } else { 5378 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5379 PC coarse_pc; 5380 5381 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5382 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5383 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5384 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5385 } else { 5386 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5387 } 5388 } 5389 /* we don't need the benign correction at coarser levels anymore */ 5390 if (pcbddc->benign_have_null && isbddc) { 5391 PC coarse_pc; 5392 PC_BDDC* coarsepcbddc; 5393 5394 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5395 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5396 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5397 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5398 } 5399 if (nullsp) { 5400 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5401 } 5402 } 5403 5404 /* Local solution on R nodes */ 5405 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5406 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5407 } 5408 /* communications from coarse sol to local primal nodes */ 5409 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5410 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5411 5412 /* Sum contributions from the two levels */ 5413 if (!pcbddc->benign_apply_coarse_only) { 5414 if (applytranspose) { 5415 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5416 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5417 } else { 5418 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5419 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5420 } 5421 /* store p0 */ 5422 if (pcbddc->benign_n) { 5423 PetscScalar *array; 5424 PetscInt j; 5425 5426 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5427 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5428 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5429 } 5430 } else { /* expand the coarse solution */ 5431 if (applytranspose) { 5432 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5433 } else { 5434 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5435 } 5436 } 5437 if (ss) { 5438 Mat save_B = pcbddc->coarse_phi_B; 5439 pcbddc->coarse_phi_B = pcbddc->coarse_psi_B; 5440 pcbddc->coarse_psi_B = save_B; 5441 Mat save_D = pcbddc->coarse_phi_D; 5442 pcbddc->coarse_phi_D = pcbddc->coarse_psi_D; 5443 pcbddc->coarse_psi_D = save_D; 5444 } 5445 PetscFunctionReturn(0); 5446 } 5447 5448 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5449 { 5450 PetscErrorCode ierr; 5451 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5452 PetscScalar *array; 5453 Vec from,to; 5454 5455 PetscFunctionBegin; 5456 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5457 from = pcbddc->coarse_vec; 5458 to = pcbddc->vec1_P; 5459 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5460 Vec tvec; 5461 5462 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5463 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5464 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5465 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5466 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5467 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5468 } 5469 } else { /* from local to global -> put data in coarse right hand side */ 5470 from = pcbddc->vec1_P; 5471 to = pcbddc->coarse_vec; 5472 } 5473 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5474 PetscFunctionReturn(0); 5475 } 5476 5477 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5478 { 5479 PetscErrorCode ierr; 5480 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5481 PetscScalar *array; 5482 Vec from,to; 5483 5484 PetscFunctionBegin; 5485 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5486 from = pcbddc->coarse_vec; 5487 to = pcbddc->vec1_P; 5488 } else { /* from local to global -> put data in coarse right hand side */ 5489 from = pcbddc->vec1_P; 5490 to = pcbddc->coarse_vec; 5491 } 5492 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5493 if (smode == SCATTER_FORWARD) { 5494 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5495 Vec tvec; 5496 5497 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5498 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5499 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5500 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5501 } 5502 } else { 5503 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5504 ierr = VecResetArray(from);CHKERRQ(ierr); 5505 } 5506 } 5507 PetscFunctionReturn(0); 5508 } 5509 5510 /* uncomment for testing purposes */ 5511 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5512 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5513 { 5514 PetscErrorCode ierr; 5515 PC_IS* pcis = (PC_IS*)(pc->data); 5516 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5517 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5518 /* one and zero */ 5519 PetscScalar one=1.0,zero=0.0; 5520 /* space to store constraints and their local indices */ 5521 PetscScalar *constraints_data; 5522 PetscInt *constraints_idxs,*constraints_idxs_B; 5523 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5524 PetscInt *constraints_n; 5525 /* iterators */ 5526 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5527 /* BLAS integers */ 5528 PetscBLASInt lwork,lierr; 5529 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5530 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5531 /* reuse */ 5532 PetscInt olocal_primal_size,olocal_primal_size_cc; 5533 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5534 /* change of basis */ 5535 PetscBool qr_needed; 5536 PetscBT change_basis,qr_needed_idx; 5537 /* auxiliary stuff */ 5538 PetscInt *nnz,*is_indices; 5539 PetscInt ncc; 5540 /* some quantities */ 5541 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5542 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5543 5544 PetscFunctionBegin; 5545 /* Destroy Mat objects computed previously */ 5546 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5547 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5548 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5549 /* save info on constraints from previous setup (if any) */ 5550 olocal_primal_size = pcbddc->local_primal_size; 5551 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5552 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5553 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5554 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5555 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5556 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5557 5558 if (!pcbddc->adaptive_selection) { 5559 IS ISForVertices,*ISForFaces,*ISForEdges; 5560 MatNullSpace nearnullsp; 5561 const Vec *nearnullvecs; 5562 Vec *localnearnullsp; 5563 PetscScalar *array; 5564 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5565 PetscBool nnsp_has_cnst; 5566 /* LAPACK working arrays for SVD or POD */ 5567 PetscBool skip_lapack,boolforchange; 5568 PetscScalar *work; 5569 PetscReal *singular_vals; 5570 #if defined(PETSC_USE_COMPLEX) 5571 PetscReal *rwork; 5572 #endif 5573 #if defined(PETSC_MISSING_LAPACK_GESVD) 5574 PetscScalar *temp_basis,*correlation_mat; 5575 #else 5576 PetscBLASInt dummy_int=1; 5577 PetscScalar dummy_scalar=1.; 5578 #endif 5579 5580 /* Get index sets for faces, edges and vertices from graph */ 5581 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5582 /* print some info */ 5583 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5584 PetscInt nv; 5585 5586 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5587 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5588 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5589 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5590 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5591 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5592 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5593 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5594 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5595 } 5596 5597 /* free unneeded index sets */ 5598 if (!pcbddc->use_vertices) { 5599 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5600 } 5601 if (!pcbddc->use_edges) { 5602 for (i=0;i<n_ISForEdges;i++) { 5603 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5604 } 5605 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5606 n_ISForEdges = 0; 5607 } 5608 if (!pcbddc->use_faces) { 5609 for (i=0;i<n_ISForFaces;i++) { 5610 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5611 } 5612 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5613 n_ISForFaces = 0; 5614 } 5615 5616 /* check if near null space is attached to global mat */ 5617 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5618 if (nearnullsp) { 5619 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5620 /* remove any stored info */ 5621 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5622 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5623 /* store information for BDDC solver reuse */ 5624 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5625 pcbddc->onearnullspace = nearnullsp; 5626 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5627 for (i=0;i<nnsp_size;i++) { 5628 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5629 } 5630 } else { /* if near null space is not provided BDDC uses constants by default */ 5631 nnsp_size = 0; 5632 nnsp_has_cnst = PETSC_TRUE; 5633 } 5634 /* get max number of constraints on a single cc */ 5635 max_constraints = nnsp_size; 5636 if (nnsp_has_cnst) max_constraints++; 5637 5638 /* 5639 Evaluate maximum storage size needed by the procedure 5640 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5641 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5642 There can be multiple constraints per connected component 5643 */ 5644 n_vertices = 0; 5645 if (ISForVertices) { 5646 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5647 } 5648 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5649 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5650 5651 total_counts = n_ISForFaces+n_ISForEdges; 5652 total_counts *= max_constraints; 5653 total_counts += n_vertices; 5654 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5655 5656 total_counts = 0; 5657 max_size_of_constraint = 0; 5658 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5659 IS used_is; 5660 if (i<n_ISForEdges) { 5661 used_is = ISForEdges[i]; 5662 } else { 5663 used_is = ISForFaces[i-n_ISForEdges]; 5664 } 5665 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5666 total_counts += j; 5667 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5668 } 5669 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5670 5671 /* get local part of global near null space vectors */ 5672 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5673 for (k=0;k<nnsp_size;k++) { 5674 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5675 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5676 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5677 } 5678 5679 /* whether or not to skip lapack calls */ 5680 skip_lapack = PETSC_TRUE; 5681 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5682 5683 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5684 if (!skip_lapack) { 5685 PetscScalar temp_work; 5686 5687 #if defined(PETSC_MISSING_LAPACK_GESVD) 5688 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5689 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5690 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5691 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5692 #if defined(PETSC_USE_COMPLEX) 5693 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5694 #endif 5695 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5696 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5697 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5698 lwork = -1; 5699 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5700 #if !defined(PETSC_USE_COMPLEX) 5701 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5702 #else 5703 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5704 #endif 5705 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5706 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5707 #else /* on missing GESVD */ 5708 /* SVD */ 5709 PetscInt max_n,min_n; 5710 max_n = max_size_of_constraint; 5711 min_n = max_constraints; 5712 if (max_size_of_constraint < max_constraints) { 5713 min_n = max_size_of_constraint; 5714 max_n = max_constraints; 5715 } 5716 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5717 #if defined(PETSC_USE_COMPLEX) 5718 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5719 #endif 5720 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5721 lwork = -1; 5722 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5723 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5724 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5725 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5726 #if !defined(PETSC_USE_COMPLEX) 5727 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5728 #else 5729 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5730 #endif 5731 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5732 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5733 #endif /* on missing GESVD */ 5734 /* Allocate optimal workspace */ 5735 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5736 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5737 } 5738 /* Now we can loop on constraining sets */ 5739 total_counts = 0; 5740 constraints_idxs_ptr[0] = 0; 5741 constraints_data_ptr[0] = 0; 5742 /* vertices */ 5743 if (n_vertices) { 5744 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5745 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5746 for (i=0;i<n_vertices;i++) { 5747 constraints_n[total_counts] = 1; 5748 constraints_data[total_counts] = 1.0; 5749 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5750 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5751 total_counts++; 5752 } 5753 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5754 n_vertices = total_counts; 5755 } 5756 5757 /* edges and faces */ 5758 total_counts_cc = total_counts; 5759 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5760 IS used_is; 5761 PetscBool idxs_copied = PETSC_FALSE; 5762 5763 if (ncc<n_ISForEdges) { 5764 used_is = ISForEdges[ncc]; 5765 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5766 } else { 5767 used_is = ISForFaces[ncc-n_ISForEdges]; 5768 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5769 } 5770 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5771 5772 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5773 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5774 /* change of basis should not be performed on local periodic nodes */ 5775 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5776 if (nnsp_has_cnst) { 5777 PetscScalar quad_value; 5778 5779 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5780 idxs_copied = PETSC_TRUE; 5781 5782 if (!pcbddc->use_nnsp_true) { 5783 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5784 } else { 5785 quad_value = 1.0; 5786 } 5787 for (j=0;j<size_of_constraint;j++) { 5788 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5789 } 5790 temp_constraints++; 5791 total_counts++; 5792 } 5793 for (k=0;k<nnsp_size;k++) { 5794 PetscReal real_value; 5795 PetscScalar *ptr_to_data; 5796 5797 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5798 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5799 for (j=0;j<size_of_constraint;j++) { 5800 ptr_to_data[j] = array[is_indices[j]]; 5801 } 5802 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5803 /* check if array is null on the connected component */ 5804 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5805 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5806 if (real_value > 0.0) { /* keep indices and values */ 5807 temp_constraints++; 5808 total_counts++; 5809 if (!idxs_copied) { 5810 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5811 idxs_copied = PETSC_TRUE; 5812 } 5813 } 5814 } 5815 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5816 valid_constraints = temp_constraints; 5817 if (!pcbddc->use_nnsp_true && temp_constraints) { 5818 if (temp_constraints == 1) { /* just normalize the constraint */ 5819 PetscScalar norm,*ptr_to_data; 5820 5821 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5822 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5823 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5824 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5825 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5826 } else { /* perform SVD */ 5827 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5828 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5829 5830 #if defined(PETSC_MISSING_LAPACK_GESVD) 5831 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5832 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5833 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5834 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5835 from that computed using LAPACKgesvd 5836 -> This is due to a different computation of eigenvectors in LAPACKheev 5837 -> The quality of the POD-computed basis will be the same */ 5838 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5839 /* Store upper triangular part of correlation matrix */ 5840 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5841 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5842 for (j=0;j<temp_constraints;j++) { 5843 for (k=0;k<j+1;k++) { 5844 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5845 } 5846 } 5847 /* compute eigenvalues and eigenvectors of correlation matrix */ 5848 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5849 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5850 #if !defined(PETSC_USE_COMPLEX) 5851 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5852 #else 5853 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5854 #endif 5855 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5856 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5857 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5858 j = 0; 5859 while (j < temp_constraints && singular_vals[j] < tol) j++; 5860 total_counts = total_counts-j; 5861 valid_constraints = temp_constraints-j; 5862 /* scale and copy POD basis into used quadrature memory */ 5863 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5864 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5865 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5866 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5867 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5868 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5869 if (j<temp_constraints) { 5870 PetscInt ii; 5871 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5872 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5873 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5874 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5875 for (k=0;k<temp_constraints-j;k++) { 5876 for (ii=0;ii<size_of_constraint;ii++) { 5877 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5878 } 5879 } 5880 } 5881 #else /* on missing GESVD */ 5882 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5883 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5884 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5885 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5886 #if !defined(PETSC_USE_COMPLEX) 5887 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5888 #else 5889 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5890 #endif 5891 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5892 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5893 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5894 k = temp_constraints; 5895 if (k > size_of_constraint) k = size_of_constraint; 5896 j = 0; 5897 while (j < k && singular_vals[k-j-1] < tol) j++; 5898 valid_constraints = k-j; 5899 total_counts = total_counts-temp_constraints+valid_constraints; 5900 #endif /* on missing GESVD */ 5901 } 5902 } 5903 /* update pointers information */ 5904 if (valid_constraints) { 5905 constraints_n[total_counts_cc] = valid_constraints; 5906 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5907 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5908 /* set change_of_basis flag */ 5909 if (boolforchange) { 5910 PetscBTSet(change_basis,total_counts_cc); 5911 } 5912 total_counts_cc++; 5913 } 5914 } 5915 /* free workspace */ 5916 if (!skip_lapack) { 5917 ierr = PetscFree(work);CHKERRQ(ierr); 5918 #if defined(PETSC_USE_COMPLEX) 5919 ierr = PetscFree(rwork);CHKERRQ(ierr); 5920 #endif 5921 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5922 #if defined(PETSC_MISSING_LAPACK_GESVD) 5923 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5924 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5925 #endif 5926 } 5927 for (k=0;k<nnsp_size;k++) { 5928 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5929 } 5930 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5931 /* free index sets of faces, edges and vertices */ 5932 for (i=0;i<n_ISForFaces;i++) { 5933 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5934 } 5935 if (n_ISForFaces) { 5936 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5937 } 5938 for (i=0;i<n_ISForEdges;i++) { 5939 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5940 } 5941 if (n_ISForEdges) { 5942 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5943 } 5944 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5945 } else { 5946 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5947 5948 total_counts = 0; 5949 n_vertices = 0; 5950 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5951 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5952 } 5953 max_constraints = 0; 5954 total_counts_cc = 0; 5955 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5956 total_counts += pcbddc->adaptive_constraints_n[i]; 5957 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5958 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5959 } 5960 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5961 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5962 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5963 constraints_data = pcbddc->adaptive_constraints_data; 5964 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5965 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5966 total_counts_cc = 0; 5967 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5968 if (pcbddc->adaptive_constraints_n[i]) { 5969 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5970 } 5971 } 5972 #if 0 5973 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5974 for (i=0;i<total_counts_cc;i++) { 5975 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5976 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5977 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5978 printf(" %d",constraints_idxs[j]); 5979 } 5980 printf("\n"); 5981 printf("number of cc: %d\n",constraints_n[i]); 5982 } 5983 for (i=0;i<n_vertices;i++) { 5984 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5985 } 5986 for (i=0;i<sub_schurs->n_subs;i++) { 5987 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5988 } 5989 #endif 5990 5991 max_size_of_constraint = 0; 5992 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5993 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5994 /* Change of basis */ 5995 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5996 if (pcbddc->use_change_of_basis) { 5997 for (i=0;i<sub_schurs->n_subs;i++) { 5998 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5999 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 6000 } 6001 } 6002 } 6003 } 6004 pcbddc->local_primal_size = total_counts; 6005 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6006 6007 /* map constraints_idxs in boundary numbering */ 6008 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 6009 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 6010 6011 /* Create constraint matrix */ 6012 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6013 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 6014 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 6015 6016 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6017 /* determine if a QR strategy is needed for change of basis */ 6018 qr_needed = PETSC_FALSE; 6019 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6020 total_primal_vertices=0; 6021 pcbddc->local_primal_size_cc = 0; 6022 for (i=0;i<total_counts_cc;i++) { 6023 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6024 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6025 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6026 pcbddc->local_primal_size_cc += 1; 6027 } else if (PetscBTLookup(change_basis,i)) { 6028 for (k=0;k<constraints_n[i];k++) { 6029 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6030 } 6031 pcbddc->local_primal_size_cc += constraints_n[i]; 6032 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6033 PetscBTSet(qr_needed_idx,i); 6034 qr_needed = PETSC_TRUE; 6035 } 6036 } else { 6037 pcbddc->local_primal_size_cc += 1; 6038 } 6039 } 6040 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6041 pcbddc->n_vertices = total_primal_vertices; 6042 /* permute indices in order to have a sorted set of vertices */ 6043 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6044 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6045 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 6046 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6047 6048 /* nonzero structure of constraint matrix */ 6049 /* and get reference dof for local constraints */ 6050 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6051 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6052 6053 j = total_primal_vertices; 6054 total_counts = total_primal_vertices; 6055 cum = total_primal_vertices; 6056 for (i=n_vertices;i<total_counts_cc;i++) { 6057 if (!PetscBTLookup(change_basis,i)) { 6058 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6059 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6060 cum++; 6061 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6062 for (k=0;k<constraints_n[i];k++) { 6063 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6064 nnz[j+k] = size_of_constraint; 6065 } 6066 j += constraints_n[i]; 6067 } 6068 } 6069 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6070 ierr = PetscFree(nnz);CHKERRQ(ierr); 6071 6072 /* set values in constraint matrix */ 6073 for (i=0;i<total_primal_vertices;i++) { 6074 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6075 } 6076 total_counts = total_primal_vertices; 6077 for (i=n_vertices;i<total_counts_cc;i++) { 6078 if (!PetscBTLookup(change_basis,i)) { 6079 PetscInt *cols; 6080 6081 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6082 cols = constraints_idxs+constraints_idxs_ptr[i]; 6083 for (k=0;k<constraints_n[i];k++) { 6084 PetscInt row = total_counts+k; 6085 PetscScalar *vals; 6086 6087 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6088 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6089 } 6090 total_counts += constraints_n[i]; 6091 } 6092 } 6093 /* assembling */ 6094 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6095 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6096 6097 /* 6098 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6099 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 6100 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 6101 */ 6102 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6103 if (pcbddc->use_change_of_basis) { 6104 /* dual and primal dofs on a single cc */ 6105 PetscInt dual_dofs,primal_dofs; 6106 /* working stuff for GEQRF */ 6107 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 6108 PetscBLASInt lqr_work; 6109 /* working stuff for UNGQR */ 6110 PetscScalar *gqr_work,lgqr_work_t; 6111 PetscBLASInt lgqr_work; 6112 /* working stuff for TRTRS */ 6113 PetscScalar *trs_rhs; 6114 PetscBLASInt Blas_NRHS; 6115 /* pointers for values insertion into change of basis matrix */ 6116 PetscInt *start_rows,*start_cols; 6117 PetscScalar *start_vals; 6118 /* working stuff for values insertion */ 6119 PetscBT is_primal; 6120 PetscInt *aux_primal_numbering_B; 6121 /* matrix sizes */ 6122 PetscInt global_size,local_size; 6123 /* temporary change of basis */ 6124 Mat localChangeOfBasisMatrix; 6125 /* extra space for debugging */ 6126 PetscScalar *dbg_work; 6127 6128 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6129 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6130 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6131 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6132 /* nonzeros for local mat */ 6133 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6134 if (!pcbddc->benign_change || pcbddc->fake_change) { 6135 for (i=0;i<pcis->n;i++) nnz[i]=1; 6136 } else { 6137 const PetscInt *ii; 6138 PetscInt n; 6139 PetscBool flg_row; 6140 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6141 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6142 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6143 } 6144 for (i=n_vertices;i<total_counts_cc;i++) { 6145 if (PetscBTLookup(change_basis,i)) { 6146 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6147 if (PetscBTLookup(qr_needed_idx,i)) { 6148 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6149 } else { 6150 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6151 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6152 } 6153 } 6154 } 6155 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6156 ierr = PetscFree(nnz);CHKERRQ(ierr); 6157 /* Set interior change in the matrix */ 6158 if (!pcbddc->benign_change || pcbddc->fake_change) { 6159 for (i=0;i<pcis->n;i++) { 6160 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6161 } 6162 } else { 6163 const PetscInt *ii,*jj; 6164 PetscScalar *aa; 6165 PetscInt n; 6166 PetscBool flg_row; 6167 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6168 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6169 for (i=0;i<n;i++) { 6170 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6171 } 6172 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6173 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6174 } 6175 6176 if (pcbddc->dbg_flag) { 6177 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6178 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6179 } 6180 6181 6182 /* Now we loop on the constraints which need a change of basis */ 6183 /* 6184 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6185 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6186 6187 Basic blocks of change of basis matrix T computed by 6188 6189 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6190 6191 | 1 0 ... 0 s_1/S | 6192 | 0 1 ... 0 s_2/S | 6193 | ... | 6194 | 0 ... 1 s_{n-1}/S | 6195 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6196 6197 with S = \sum_{i=1}^n s_i^2 6198 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6199 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6200 6201 - QR decomposition of constraints otherwise 6202 */ 6203 if (qr_needed) { 6204 /* space to store Q */ 6205 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6206 /* array to store scaling factors for reflectors */ 6207 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6208 /* first we issue queries for optimal work */ 6209 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6210 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6211 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6212 lqr_work = -1; 6213 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6214 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6215 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6216 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6217 lgqr_work = -1; 6218 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6219 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6220 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6221 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6222 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6223 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6224 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6225 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6226 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6227 /* array to store rhs and solution of triangular solver */ 6228 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6229 /* allocating workspace for check */ 6230 if (pcbddc->dbg_flag) { 6231 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6232 } 6233 } 6234 /* array to store whether a node is primal or not */ 6235 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6236 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6237 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6238 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 6239 for (i=0;i<total_primal_vertices;i++) { 6240 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6241 } 6242 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6243 6244 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6245 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6246 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6247 if (PetscBTLookup(change_basis,total_counts)) { 6248 /* get constraint info */ 6249 primal_dofs = constraints_n[total_counts]; 6250 dual_dofs = size_of_constraint-primal_dofs; 6251 6252 if (pcbddc->dbg_flag) { 6253 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6254 } 6255 6256 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6257 6258 /* copy quadrature constraints for change of basis check */ 6259 if (pcbddc->dbg_flag) { 6260 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6261 } 6262 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6263 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6264 6265 /* compute QR decomposition of constraints */ 6266 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6267 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6268 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6269 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6270 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6271 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6272 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6273 6274 /* explictly compute R^-T */ 6275 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 6276 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6277 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6278 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6279 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6280 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6281 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6282 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6283 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6284 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6285 6286 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6287 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6288 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6289 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6290 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6291 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6292 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6293 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6294 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6295 6296 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6297 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6298 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6299 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6300 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6301 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6302 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6303 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6304 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6305 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6306 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6307 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6308 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6309 6310 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6311 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6312 /* insert cols for primal dofs */ 6313 for (j=0;j<primal_dofs;j++) { 6314 start_vals = &qr_basis[j*size_of_constraint]; 6315 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6316 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6317 } 6318 /* insert cols for dual dofs */ 6319 for (j=0,k=0;j<dual_dofs;k++) { 6320 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6321 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6322 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6323 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6324 j++; 6325 } 6326 } 6327 6328 /* check change of basis */ 6329 if (pcbddc->dbg_flag) { 6330 PetscInt ii,jj; 6331 PetscBool valid_qr=PETSC_TRUE; 6332 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6333 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6334 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6335 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6336 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6337 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6338 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6339 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6340 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6341 for (jj=0;jj<size_of_constraint;jj++) { 6342 for (ii=0;ii<primal_dofs;ii++) { 6343 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6344 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6345 } 6346 } 6347 if (!valid_qr) { 6348 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6349 for (jj=0;jj<size_of_constraint;jj++) { 6350 for (ii=0;ii<primal_dofs;ii++) { 6351 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6352 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6353 } 6354 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6355 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6356 } 6357 } 6358 } 6359 } else { 6360 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6361 } 6362 } 6363 } else { /* simple transformation block */ 6364 PetscInt row,col; 6365 PetscScalar val,norm; 6366 6367 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6368 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6369 for (j=0;j<size_of_constraint;j++) { 6370 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6371 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6372 if (!PetscBTLookup(is_primal,row_B)) { 6373 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6374 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6375 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6376 } else { 6377 for (k=0;k<size_of_constraint;k++) { 6378 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6379 if (row != col) { 6380 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6381 } else { 6382 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6383 } 6384 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6385 } 6386 } 6387 } 6388 if (pcbddc->dbg_flag) { 6389 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6390 } 6391 } 6392 } else { 6393 if (pcbddc->dbg_flag) { 6394 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6395 } 6396 } 6397 } 6398 6399 /* free workspace */ 6400 if (qr_needed) { 6401 if (pcbddc->dbg_flag) { 6402 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6403 } 6404 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6405 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6406 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6407 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6408 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6409 } 6410 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6411 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6412 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6413 6414 /* assembling of global change of variable */ 6415 if (!pcbddc->fake_change) { 6416 Mat tmat; 6417 PetscInt bs; 6418 6419 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6420 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6421 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6422 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6423 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6424 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6425 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6426 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6427 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6428 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6429 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6430 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6431 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6432 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6433 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6434 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6435 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6436 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6437 6438 /* check */ 6439 if (pcbddc->dbg_flag) { 6440 PetscReal error; 6441 Vec x,x_change; 6442 6443 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6444 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6445 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6446 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6447 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6448 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6449 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6450 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6451 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6452 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6453 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6454 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6455 if (error > PETSC_SMALL) { 6456 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6457 } 6458 ierr = VecDestroy(&x);CHKERRQ(ierr); 6459 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6460 } 6461 /* adapt sub_schurs computed (if any) */ 6462 if (pcbddc->use_deluxe_scaling) { 6463 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6464 6465 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6466 if (sub_schurs && sub_schurs->S_Ej_all) { 6467 Mat S_new,tmat; 6468 IS is_all_N,is_V_Sall = NULL; 6469 6470 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6471 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6472 if (pcbddc->deluxe_zerorows) { 6473 ISLocalToGlobalMapping NtoSall; 6474 IS is_V; 6475 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6476 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6477 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6478 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6479 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6480 } 6481 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6482 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6483 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6484 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6485 if (pcbddc->deluxe_zerorows) { 6486 const PetscScalar *array; 6487 const PetscInt *idxs_V,*idxs_all; 6488 PetscInt i,n_V; 6489 6490 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6491 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6492 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6493 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6494 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6495 for (i=0;i<n_V;i++) { 6496 PetscScalar val; 6497 PetscInt idx; 6498 6499 idx = idxs_V[i]; 6500 val = array[idxs_all[idxs_V[i]]]; 6501 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6502 } 6503 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6504 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6505 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6506 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6507 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6508 } 6509 sub_schurs->S_Ej_all = S_new; 6510 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6511 if (sub_schurs->sum_S_Ej_all) { 6512 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6513 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6514 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6515 if (pcbddc->deluxe_zerorows) { 6516 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6517 } 6518 sub_schurs->sum_S_Ej_all = S_new; 6519 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6520 } 6521 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6522 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6523 } 6524 /* destroy any change of basis context in sub_schurs */ 6525 if (sub_schurs && sub_schurs->change) { 6526 PetscInt i; 6527 6528 for (i=0;i<sub_schurs->n_subs;i++) { 6529 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6530 } 6531 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6532 } 6533 } 6534 if (pcbddc->switch_static) { /* need to save the local change */ 6535 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6536 } else { 6537 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6538 } 6539 /* determine if any process has changed the pressures locally */ 6540 pcbddc->change_interior = pcbddc->benign_have_null; 6541 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6542 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6543 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6544 pcbddc->use_qr_single = qr_needed; 6545 } 6546 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6547 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6548 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6549 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6550 } else { 6551 Mat benign_global = NULL; 6552 if (pcbddc->benign_have_null) { 6553 Mat tmat; 6554 6555 pcbddc->change_interior = PETSC_TRUE; 6556 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6557 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6558 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6559 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6560 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6561 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6562 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6563 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6564 if (pcbddc->benign_change) { 6565 Mat M; 6566 6567 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6568 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6569 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6570 ierr = MatDestroy(&M);CHKERRQ(ierr); 6571 } else { 6572 Mat eye; 6573 PetscScalar *array; 6574 6575 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6576 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6577 for (i=0;i<pcis->n;i++) { 6578 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6579 } 6580 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6581 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6582 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6583 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6584 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6585 } 6586 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6587 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6588 } 6589 if (pcbddc->user_ChangeOfBasisMatrix) { 6590 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6591 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6592 } else if (pcbddc->benign_have_null) { 6593 pcbddc->ChangeOfBasisMatrix = benign_global; 6594 } 6595 } 6596 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6597 IS is_global; 6598 const PetscInt *gidxs; 6599 6600 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6601 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6602 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6603 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6604 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6605 } 6606 } 6607 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6608 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6609 } 6610 6611 if (!pcbddc->fake_change) { 6612 /* add pressure dofs to set of primal nodes for numbering purposes */ 6613 for (i=0;i<pcbddc->benign_n;i++) { 6614 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6615 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6616 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6617 pcbddc->local_primal_size_cc++; 6618 pcbddc->local_primal_size++; 6619 } 6620 6621 /* check if a new primal space has been introduced (also take into account benign trick) */ 6622 pcbddc->new_primal_space_local = PETSC_TRUE; 6623 if (olocal_primal_size == pcbddc->local_primal_size) { 6624 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6625 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6626 if (!pcbddc->new_primal_space_local) { 6627 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6628 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6629 } 6630 } 6631 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6632 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6633 } 6634 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6635 6636 /* flush dbg viewer */ 6637 if (pcbddc->dbg_flag) { 6638 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6639 } 6640 6641 /* free workspace */ 6642 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6643 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6644 if (!pcbddc->adaptive_selection) { 6645 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6646 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6647 } else { 6648 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6649 pcbddc->adaptive_constraints_idxs_ptr, 6650 pcbddc->adaptive_constraints_data_ptr, 6651 pcbddc->adaptive_constraints_idxs, 6652 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6653 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6654 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6655 } 6656 PetscFunctionReturn(0); 6657 } 6658 6659 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6660 { 6661 ISLocalToGlobalMapping map; 6662 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6663 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6664 PetscInt i,N; 6665 PetscBool rcsr = PETSC_FALSE; 6666 PetscErrorCode ierr; 6667 6668 PetscFunctionBegin; 6669 if (pcbddc->recompute_topography) { 6670 pcbddc->graphanalyzed = PETSC_FALSE; 6671 /* Reset previously computed graph */ 6672 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6673 /* Init local Graph struct */ 6674 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6675 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6676 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6677 6678 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6679 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6680 } 6681 /* Check validity of the csr graph passed in by the user */ 6682 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6683 6684 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6685 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6686 PetscInt *xadj,*adjncy; 6687 PetscInt nvtxs; 6688 PetscBool flg_row=PETSC_FALSE; 6689 6690 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6691 if (flg_row) { 6692 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6693 pcbddc->computed_rowadj = PETSC_TRUE; 6694 } 6695 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6696 rcsr = PETSC_TRUE; 6697 } 6698 if (pcbddc->dbg_flag) { 6699 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6700 } 6701 6702 /* Setup of Graph */ 6703 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6704 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6705 6706 /* attach info on disconnected subdomains if present */ 6707 if (pcbddc->n_local_subs) { 6708 PetscInt *local_subs; 6709 6710 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6711 for (i=0;i<pcbddc->n_local_subs;i++) { 6712 const PetscInt *idxs; 6713 PetscInt nl,j; 6714 6715 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6716 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6717 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6718 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6719 } 6720 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6721 pcbddc->mat_graph->local_subs = local_subs; 6722 } 6723 } 6724 6725 if (!pcbddc->graphanalyzed) { 6726 /* Graph's connected components analysis */ 6727 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6728 pcbddc->graphanalyzed = PETSC_TRUE; 6729 } 6730 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6731 PetscFunctionReturn(0); 6732 } 6733 6734 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6735 { 6736 PetscInt i,j; 6737 PetscScalar *alphas; 6738 PetscErrorCode ierr; 6739 6740 PetscFunctionBegin; 6741 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6742 for (i=0;i<n;i++) { 6743 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6744 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6745 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6746 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6747 } 6748 ierr = PetscFree(alphas);CHKERRQ(ierr); 6749 PetscFunctionReturn(0); 6750 } 6751 6752 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6753 { 6754 Mat A; 6755 PetscInt n_neighs,*neighs,*n_shared,**shared; 6756 PetscMPIInt size,rank,color; 6757 PetscInt *xadj,*adjncy; 6758 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6759 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6760 PetscInt void_procs,*procs_candidates = NULL; 6761 PetscInt xadj_count,*count; 6762 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6763 PetscSubcomm psubcomm; 6764 MPI_Comm subcomm; 6765 PetscErrorCode ierr; 6766 6767 PetscFunctionBegin; 6768 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6769 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6770 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6771 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6772 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6773 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6774 6775 if (have_void) *have_void = PETSC_FALSE; 6776 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6777 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6778 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6779 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6780 im_active = !!n; 6781 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6782 void_procs = size - active_procs; 6783 /* get ranks of of non-active processes in mat communicator */ 6784 if (void_procs) { 6785 PetscInt ncand; 6786 6787 if (have_void) *have_void = PETSC_TRUE; 6788 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6789 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6790 for (i=0,ncand=0;i<size;i++) { 6791 if (!procs_candidates[i]) { 6792 procs_candidates[ncand++] = i; 6793 } 6794 } 6795 /* force n_subdomains to be not greater that the number of non-active processes */ 6796 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6797 } 6798 6799 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6800 number of subdomains requested 1 -> send to master or first candidate in voids */ 6801 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6802 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6803 PetscInt issize,isidx,dest; 6804 if (*n_subdomains == 1) dest = 0; 6805 else dest = rank; 6806 if (im_active) { 6807 issize = 1; 6808 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6809 isidx = procs_candidates[dest]; 6810 } else { 6811 isidx = dest; 6812 } 6813 } else { 6814 issize = 0; 6815 isidx = -1; 6816 } 6817 if (*n_subdomains != 1) *n_subdomains = active_procs; 6818 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6819 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6820 PetscFunctionReturn(0); 6821 } 6822 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6823 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6824 threshold = PetscMax(threshold,2); 6825 6826 /* Get info on mapping */ 6827 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6828 6829 /* build local CSR graph of subdomains' connectivity */ 6830 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6831 xadj[0] = 0; 6832 xadj[1] = PetscMax(n_neighs-1,0); 6833 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6834 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6835 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6836 for (i=1;i<n_neighs;i++) 6837 for (j=0;j<n_shared[i];j++) 6838 count[shared[i][j]] += 1; 6839 6840 xadj_count = 0; 6841 for (i=1;i<n_neighs;i++) { 6842 for (j=0;j<n_shared[i];j++) { 6843 if (count[shared[i][j]] < threshold) { 6844 adjncy[xadj_count] = neighs[i]; 6845 adjncy_wgt[xadj_count] = n_shared[i]; 6846 xadj_count++; 6847 break; 6848 } 6849 } 6850 } 6851 xadj[1] = xadj_count; 6852 ierr = PetscFree(count);CHKERRQ(ierr); 6853 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6854 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6855 6856 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6857 6858 /* Restrict work on active processes only */ 6859 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6860 if (void_procs) { 6861 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6862 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6863 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6864 subcomm = PetscSubcommChild(psubcomm); 6865 } else { 6866 psubcomm = NULL; 6867 subcomm = PetscObjectComm((PetscObject)mat); 6868 } 6869 6870 v_wgt = NULL; 6871 if (!color) { 6872 ierr = PetscFree(xadj);CHKERRQ(ierr); 6873 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6874 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6875 } else { 6876 Mat subdomain_adj; 6877 IS new_ranks,new_ranks_contig; 6878 MatPartitioning partitioner; 6879 PetscInt rstart=0,rend=0; 6880 PetscInt *is_indices,*oldranks; 6881 PetscMPIInt size; 6882 PetscBool aggregate; 6883 6884 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6885 if (void_procs) { 6886 PetscInt prank = rank; 6887 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6888 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6889 for (i=0;i<xadj[1];i++) { 6890 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6891 } 6892 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6893 } else { 6894 oldranks = NULL; 6895 } 6896 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6897 if (aggregate) { /* TODO: all this part could be made more efficient */ 6898 PetscInt lrows,row,ncols,*cols; 6899 PetscMPIInt nrank; 6900 PetscScalar *vals; 6901 6902 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6903 lrows = 0; 6904 if (nrank<redprocs) { 6905 lrows = size/redprocs; 6906 if (nrank<size%redprocs) lrows++; 6907 } 6908 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6909 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6910 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6911 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6912 row = nrank; 6913 ncols = xadj[1]-xadj[0]; 6914 cols = adjncy; 6915 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6916 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6917 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6918 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6919 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6920 ierr = PetscFree(xadj);CHKERRQ(ierr); 6921 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6922 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6923 ierr = PetscFree(vals);CHKERRQ(ierr); 6924 if (use_vwgt) { 6925 Vec v; 6926 const PetscScalar *array; 6927 PetscInt nl; 6928 6929 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6930 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 6931 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6932 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6933 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6934 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6935 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6936 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6937 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6938 ierr = VecDestroy(&v);CHKERRQ(ierr); 6939 } 6940 } else { 6941 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6942 if (use_vwgt) { 6943 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6944 v_wgt[0] = n; 6945 } 6946 } 6947 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6948 6949 /* Partition */ 6950 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6951 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6952 if (v_wgt) { 6953 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6954 } 6955 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6956 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6957 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6958 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6959 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6960 6961 /* renumber new_ranks to avoid "holes" in new set of processors */ 6962 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6963 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6964 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6965 if (!aggregate) { 6966 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6967 #if defined(PETSC_USE_DEBUG) 6968 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6969 #endif 6970 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6971 } else if (oldranks) { 6972 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6973 } else { 6974 ranks_send_to_idx[0] = is_indices[0]; 6975 } 6976 } else { 6977 PetscInt idx = 0; 6978 PetscMPIInt tag; 6979 MPI_Request *reqs; 6980 6981 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6982 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6983 for (i=rstart;i<rend;i++) { 6984 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6985 } 6986 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6987 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6988 ierr = PetscFree(reqs);CHKERRQ(ierr); 6989 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6990 #if defined(PETSC_USE_DEBUG) 6991 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6992 #endif 6993 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 6994 } else if (oldranks) { 6995 ranks_send_to_idx[0] = oldranks[idx]; 6996 } else { 6997 ranks_send_to_idx[0] = idx; 6998 } 6999 } 7000 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7001 /* clean up */ 7002 ierr = PetscFree(oldranks);CHKERRQ(ierr); 7003 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 7004 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 7005 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 7006 } 7007 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 7008 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7009 7010 /* assemble parallel IS for sends */ 7011 i = 1; 7012 if (!color) i=0; 7013 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 7014 PetscFunctionReturn(0); 7015 } 7016 7017 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7018 7019 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7020 { 7021 Mat local_mat; 7022 IS is_sends_internal; 7023 PetscInt rows,cols,new_local_rows; 7024 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7025 PetscBool ismatis,isdense,newisdense,destroy_mat; 7026 ISLocalToGlobalMapping l2gmap; 7027 PetscInt* l2gmap_indices; 7028 const PetscInt* is_indices; 7029 MatType new_local_type; 7030 /* buffers */ 7031 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7032 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7033 PetscInt *recv_buffer_idxs_local; 7034 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 7035 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7036 /* MPI */ 7037 MPI_Comm comm,comm_n; 7038 PetscSubcomm subcomm; 7039 PetscMPIInt n_sends,n_recvs,commsize; 7040 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7041 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7042 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7043 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7044 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7045 PetscErrorCode ierr; 7046 7047 PetscFunctionBegin; 7048 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7049 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7050 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7051 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7052 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7053 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7054 PetscValidLogicalCollectiveBool(mat,reuse,6); 7055 PetscValidLogicalCollectiveInt(mat,nis,8); 7056 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7057 if (nvecs) { 7058 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7059 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7060 } 7061 /* further checks */ 7062 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7063 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7064 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7065 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7066 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7067 if (reuse && *mat_n) { 7068 PetscInt mrows,mcols,mnrows,mncols; 7069 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7070 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7071 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7072 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7073 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7074 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7075 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7076 } 7077 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7078 PetscValidLogicalCollectiveInt(mat,bs,0); 7079 7080 /* prepare IS for sending if not provided */ 7081 if (!is_sends) { 7082 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7083 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7084 } else { 7085 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7086 is_sends_internal = is_sends; 7087 } 7088 7089 /* get comm */ 7090 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7091 7092 /* compute number of sends */ 7093 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7094 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7095 7096 /* compute number of receives */ 7097 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 7098 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 7099 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 7100 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7101 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7102 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7103 ierr = PetscFree(iflags);CHKERRQ(ierr); 7104 7105 /* restrict comm if requested */ 7106 subcomm = 0; 7107 destroy_mat = PETSC_FALSE; 7108 if (restrict_comm) { 7109 PetscMPIInt color,subcommsize; 7110 7111 color = 0; 7112 if (restrict_full) { 7113 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7114 } else { 7115 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7116 } 7117 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7118 subcommsize = commsize - subcommsize; 7119 /* check if reuse has been requested */ 7120 if (reuse) { 7121 if (*mat_n) { 7122 PetscMPIInt subcommsize2; 7123 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 7124 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7125 comm_n = PetscObjectComm((PetscObject)*mat_n); 7126 } else { 7127 comm_n = PETSC_COMM_SELF; 7128 } 7129 } else { /* MAT_INITIAL_MATRIX */ 7130 PetscMPIInt rank; 7131 7132 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 7133 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7134 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7135 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7136 comm_n = PetscSubcommChild(subcomm); 7137 } 7138 /* flag to destroy *mat_n if not significative */ 7139 if (color) destroy_mat = PETSC_TRUE; 7140 } else { 7141 comm_n = comm; 7142 } 7143 7144 /* prepare send/receive buffers */ 7145 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 7146 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 7147 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 7148 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 7149 if (nis) { 7150 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 7151 } 7152 7153 /* Get data from local matrices */ 7154 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7155 /* TODO: See below some guidelines on how to prepare the local buffers */ 7156 /* 7157 send_buffer_vals should contain the raw values of the local matrix 7158 send_buffer_idxs should contain: 7159 - MatType_PRIVATE type 7160 - PetscInt size_of_l2gmap 7161 - PetscInt global_row_indices[size_of_l2gmap] 7162 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7163 */ 7164 else { 7165 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7166 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7167 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7168 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7169 send_buffer_idxs[1] = i; 7170 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7171 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 7172 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7173 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7174 for (i=0;i<n_sends;i++) { 7175 ilengths_vals[is_indices[i]] = len*len; 7176 ilengths_idxs[is_indices[i]] = len+2; 7177 } 7178 } 7179 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7180 /* additional is (if any) */ 7181 if (nis) { 7182 PetscMPIInt psum; 7183 PetscInt j; 7184 for (j=0,psum=0;j<nis;j++) { 7185 PetscInt plen; 7186 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7187 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7188 psum += len+1; /* indices + lenght */ 7189 } 7190 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7191 for (j=0,psum=0;j<nis;j++) { 7192 PetscInt plen; 7193 const PetscInt *is_array_idxs; 7194 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7195 send_buffer_idxs_is[psum] = plen; 7196 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7197 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 7198 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7199 psum += plen+1; /* indices + lenght */ 7200 } 7201 for (i=0;i<n_sends;i++) { 7202 ilengths_idxs_is[is_indices[i]] = psum; 7203 } 7204 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7205 } 7206 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7207 7208 buf_size_idxs = 0; 7209 buf_size_vals = 0; 7210 buf_size_idxs_is = 0; 7211 buf_size_vecs = 0; 7212 for (i=0;i<n_recvs;i++) { 7213 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7214 buf_size_vals += (PetscInt)olengths_vals[i]; 7215 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7216 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7217 } 7218 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7219 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7220 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7221 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7222 7223 /* get new tags for clean communications */ 7224 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7225 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7226 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7227 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7228 7229 /* allocate for requests */ 7230 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7231 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7232 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7233 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7234 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7235 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7236 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7237 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7238 7239 /* communications */ 7240 ptr_idxs = recv_buffer_idxs; 7241 ptr_vals = recv_buffer_vals; 7242 ptr_idxs_is = recv_buffer_idxs_is; 7243 ptr_vecs = recv_buffer_vecs; 7244 for (i=0;i<n_recvs;i++) { 7245 source_dest = onodes[i]; 7246 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7247 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7248 ptr_idxs += olengths_idxs[i]; 7249 ptr_vals += olengths_vals[i]; 7250 if (nis) { 7251 source_dest = onodes_is[i]; 7252 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7253 ptr_idxs_is += olengths_idxs_is[i]; 7254 } 7255 if (nvecs) { 7256 source_dest = onodes[i]; 7257 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7258 ptr_vecs += olengths_idxs[i]-2; 7259 } 7260 } 7261 for (i=0;i<n_sends;i++) { 7262 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7263 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7264 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7265 if (nis) { 7266 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7267 } 7268 if (nvecs) { 7269 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7270 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7271 } 7272 } 7273 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7274 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7275 7276 /* assemble new l2g map */ 7277 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7278 ptr_idxs = recv_buffer_idxs; 7279 new_local_rows = 0; 7280 for (i=0;i<n_recvs;i++) { 7281 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7282 ptr_idxs += olengths_idxs[i]; 7283 } 7284 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7285 ptr_idxs = recv_buffer_idxs; 7286 new_local_rows = 0; 7287 for (i=0;i<n_recvs;i++) { 7288 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 7289 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7290 ptr_idxs += olengths_idxs[i]; 7291 } 7292 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7293 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7294 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7295 7296 /* infer new local matrix type from received local matrices type */ 7297 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7298 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7299 if (n_recvs) { 7300 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7301 ptr_idxs = recv_buffer_idxs; 7302 for (i=0;i<n_recvs;i++) { 7303 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7304 new_local_type_private = MATAIJ_PRIVATE; 7305 break; 7306 } 7307 ptr_idxs += olengths_idxs[i]; 7308 } 7309 switch (new_local_type_private) { 7310 case MATDENSE_PRIVATE: 7311 new_local_type = MATSEQAIJ; 7312 bs = 1; 7313 break; 7314 case MATAIJ_PRIVATE: 7315 new_local_type = MATSEQAIJ; 7316 bs = 1; 7317 break; 7318 case MATBAIJ_PRIVATE: 7319 new_local_type = MATSEQBAIJ; 7320 break; 7321 case MATSBAIJ_PRIVATE: 7322 new_local_type = MATSEQSBAIJ; 7323 break; 7324 default: 7325 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7326 break; 7327 } 7328 } else { /* by default, new_local_type is seqaij */ 7329 new_local_type = MATSEQAIJ; 7330 bs = 1; 7331 } 7332 7333 /* create MATIS object if needed */ 7334 if (!reuse) { 7335 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7336 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7337 } else { 7338 /* it also destroys the local matrices */ 7339 if (*mat_n) { 7340 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7341 } else { /* this is a fake object */ 7342 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7343 } 7344 } 7345 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7346 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7347 7348 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7349 7350 /* Global to local map of received indices */ 7351 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7352 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7353 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7354 7355 /* restore attributes -> type of incoming data and its size */ 7356 buf_size_idxs = 0; 7357 for (i=0;i<n_recvs;i++) { 7358 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7359 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7360 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7361 } 7362 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7363 7364 /* set preallocation */ 7365 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7366 if (!newisdense) { 7367 PetscInt *new_local_nnz=0; 7368 7369 ptr_idxs = recv_buffer_idxs_local; 7370 if (n_recvs) { 7371 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7372 } 7373 for (i=0;i<n_recvs;i++) { 7374 PetscInt j; 7375 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7376 for (j=0;j<*(ptr_idxs+1);j++) { 7377 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7378 } 7379 } else { 7380 /* TODO */ 7381 } 7382 ptr_idxs += olengths_idxs[i]; 7383 } 7384 if (new_local_nnz) { 7385 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7386 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7387 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7388 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7389 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7390 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7391 } else { 7392 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7393 } 7394 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7395 } else { 7396 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7397 } 7398 7399 /* set values */ 7400 ptr_vals = recv_buffer_vals; 7401 ptr_idxs = recv_buffer_idxs_local; 7402 for (i=0;i<n_recvs;i++) { 7403 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7404 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7405 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7406 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7407 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7408 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7409 } else { 7410 /* TODO */ 7411 } 7412 ptr_idxs += olengths_idxs[i]; 7413 ptr_vals += olengths_vals[i]; 7414 } 7415 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7416 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7417 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7418 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7419 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7420 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7421 7422 #if 0 7423 if (!restrict_comm) { /* check */ 7424 Vec lvec,rvec; 7425 PetscReal infty_error; 7426 7427 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7428 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7429 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7430 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7431 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7432 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7433 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7434 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7435 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7436 } 7437 #endif 7438 7439 /* assemble new additional is (if any) */ 7440 if (nis) { 7441 PetscInt **temp_idxs,*count_is,j,psum; 7442 7443 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7444 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7445 ptr_idxs = recv_buffer_idxs_is; 7446 psum = 0; 7447 for (i=0;i<n_recvs;i++) { 7448 for (j=0;j<nis;j++) { 7449 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7450 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7451 psum += plen; 7452 ptr_idxs += plen+1; /* shift pointer to received data */ 7453 } 7454 } 7455 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7456 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7457 for (i=1;i<nis;i++) { 7458 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7459 } 7460 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7461 ptr_idxs = recv_buffer_idxs_is; 7462 for (i=0;i<n_recvs;i++) { 7463 for (j=0;j<nis;j++) { 7464 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7465 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7466 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7467 ptr_idxs += plen+1; /* shift pointer to received data */ 7468 } 7469 } 7470 for (i=0;i<nis;i++) { 7471 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7472 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7473 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7474 } 7475 ierr = PetscFree(count_is);CHKERRQ(ierr); 7476 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7477 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7478 } 7479 /* free workspace */ 7480 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7481 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7482 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7483 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7484 if (isdense) { 7485 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7486 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7487 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7488 } else { 7489 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7490 } 7491 if (nis) { 7492 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7493 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7494 } 7495 7496 if (nvecs) { 7497 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7498 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7499 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7500 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7501 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7502 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7503 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7504 /* set values */ 7505 ptr_vals = recv_buffer_vecs; 7506 ptr_idxs = recv_buffer_idxs_local; 7507 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7508 for (i=0;i<n_recvs;i++) { 7509 PetscInt j; 7510 for (j=0;j<*(ptr_idxs+1);j++) { 7511 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7512 } 7513 ptr_idxs += olengths_idxs[i]; 7514 ptr_vals += olengths_idxs[i]-2; 7515 } 7516 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7517 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7518 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7519 } 7520 7521 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7522 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7523 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7524 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7525 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7526 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7527 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7528 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7529 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7530 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7531 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7532 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7533 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7534 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7535 ierr = PetscFree(onodes);CHKERRQ(ierr); 7536 if (nis) { 7537 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7538 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7539 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7540 } 7541 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7542 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7543 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7544 for (i=0;i<nis;i++) { 7545 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7546 } 7547 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7548 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7549 } 7550 *mat_n = NULL; 7551 } 7552 PetscFunctionReturn(0); 7553 } 7554 7555 /* temporary hack into ksp private data structure */ 7556 #include <petsc/private/kspimpl.h> 7557 7558 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7559 { 7560 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7561 PC_IS *pcis = (PC_IS*)pc->data; 7562 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7563 Mat coarsedivudotp = NULL; 7564 Mat coarseG,t_coarse_mat_is; 7565 MatNullSpace CoarseNullSpace = NULL; 7566 ISLocalToGlobalMapping coarse_islg; 7567 IS coarse_is,*isarray; 7568 PetscInt i,im_active=-1,active_procs=-1; 7569 PetscInt nis,nisdofs,nisneu,nisvert; 7570 PC pc_temp; 7571 PCType coarse_pc_type; 7572 KSPType coarse_ksp_type; 7573 PetscBool multilevel_requested,multilevel_allowed; 7574 PetscBool coarse_reuse; 7575 PetscInt ncoarse,nedcfield; 7576 PetscBool compute_vecs = PETSC_FALSE; 7577 PetscScalar *array; 7578 MatReuse coarse_mat_reuse; 7579 PetscBool restr, full_restr, have_void; 7580 PetscMPIInt commsize; 7581 PetscErrorCode ierr; 7582 7583 PetscFunctionBegin; 7584 /* Assign global numbering to coarse dofs */ 7585 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7586 PetscInt ocoarse_size; 7587 compute_vecs = PETSC_TRUE; 7588 7589 pcbddc->new_primal_space = PETSC_TRUE; 7590 ocoarse_size = pcbddc->coarse_size; 7591 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7592 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7593 /* see if we can avoid some work */ 7594 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7595 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7596 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7597 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7598 coarse_reuse = PETSC_FALSE; 7599 } else { /* we can safely reuse already computed coarse matrix */ 7600 coarse_reuse = PETSC_TRUE; 7601 } 7602 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7603 coarse_reuse = PETSC_FALSE; 7604 } 7605 /* reset any subassembling information */ 7606 if (!coarse_reuse || pcbddc->recompute_topography) { 7607 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7608 } 7609 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7610 coarse_reuse = PETSC_TRUE; 7611 } 7612 /* assemble coarse matrix */ 7613 if (coarse_reuse && pcbddc->coarse_ksp) { 7614 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7615 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7616 coarse_mat_reuse = MAT_REUSE_MATRIX; 7617 } else { 7618 coarse_mat = NULL; 7619 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7620 } 7621 7622 /* creates temporary l2gmap and IS for coarse indexes */ 7623 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7624 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7625 7626 /* creates temporary MATIS object for coarse matrix */ 7627 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7628 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7629 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7630 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7631 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7632 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7633 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7634 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7635 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7636 7637 /* count "active" (i.e. with positive local size) and "void" processes */ 7638 im_active = !!(pcis->n); 7639 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7640 7641 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7642 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7643 /* full_restr : just use the receivers from the subassembling pattern */ 7644 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);CHKERRQ(ierr); 7645 coarse_mat_is = NULL; 7646 multilevel_allowed = PETSC_FALSE; 7647 multilevel_requested = PETSC_FALSE; 7648 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7649 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7650 if (multilevel_requested) { 7651 ncoarse = active_procs/pcbddc->coarsening_ratio; 7652 restr = PETSC_FALSE; 7653 full_restr = PETSC_FALSE; 7654 } else { 7655 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7656 restr = PETSC_TRUE; 7657 full_restr = PETSC_TRUE; 7658 } 7659 if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7660 ncoarse = PetscMax(1,ncoarse); 7661 if (!pcbddc->coarse_subassembling) { 7662 if (pcbddc->coarsening_ratio > 1) { 7663 if (multilevel_requested) { 7664 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7665 } else { 7666 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7667 } 7668 } else { 7669 PetscMPIInt rank; 7670 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7671 have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE; 7672 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7673 } 7674 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7675 PetscInt psum; 7676 if (pcbddc->coarse_ksp) psum = 1; 7677 else psum = 0; 7678 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7679 if (ncoarse < commsize) have_void = PETSC_TRUE; 7680 } 7681 /* determine if we can go multilevel */ 7682 if (multilevel_requested) { 7683 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7684 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7685 } 7686 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7687 7688 /* dump subassembling pattern */ 7689 if (pcbddc->dbg_flag && multilevel_allowed) { 7690 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7691 } 7692 7693 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7694 nedcfield = -1; 7695 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7696 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7697 const PetscInt *idxs; 7698 ISLocalToGlobalMapping tmap; 7699 7700 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7701 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7702 /* allocate space for temporary storage */ 7703 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7704 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7705 /* allocate for IS array */ 7706 nisdofs = pcbddc->n_ISForDofsLocal; 7707 if (pcbddc->nedclocal) { 7708 if (pcbddc->nedfield > -1) { 7709 nedcfield = pcbddc->nedfield; 7710 } else { 7711 nedcfield = 0; 7712 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7713 nisdofs = 1; 7714 } 7715 } 7716 nisneu = !!pcbddc->NeumannBoundariesLocal; 7717 nisvert = 0; /* nisvert is not used */ 7718 nis = nisdofs + nisneu + nisvert; 7719 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7720 /* dofs splitting */ 7721 for (i=0;i<nisdofs;i++) { 7722 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7723 if (nedcfield != i) { 7724 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7725 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7726 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7727 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7728 } else { 7729 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7730 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7731 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7732 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7733 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7734 } 7735 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7736 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7737 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7738 } 7739 /* neumann boundaries */ 7740 if (pcbddc->NeumannBoundariesLocal) { 7741 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7742 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7743 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7744 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7745 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7746 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7747 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7748 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7749 } 7750 /* free memory */ 7751 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7752 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7753 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7754 } else { 7755 nis = 0; 7756 nisdofs = 0; 7757 nisneu = 0; 7758 nisvert = 0; 7759 isarray = NULL; 7760 } 7761 /* destroy no longer needed map */ 7762 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7763 7764 /* subassemble */ 7765 if (multilevel_allowed) { 7766 Vec vp[1]; 7767 PetscInt nvecs = 0; 7768 PetscBool reuse,reuser; 7769 7770 if (coarse_mat) reuse = PETSC_TRUE; 7771 else reuse = PETSC_FALSE; 7772 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7773 vp[0] = NULL; 7774 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7775 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7776 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7777 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7778 nvecs = 1; 7779 7780 if (pcbddc->divudotp) { 7781 Mat B,loc_divudotp; 7782 Vec v,p; 7783 IS dummy; 7784 PetscInt np; 7785 7786 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7787 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7788 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7789 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7790 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7791 ierr = VecSet(p,1.);CHKERRQ(ierr); 7792 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7793 ierr = VecDestroy(&p);CHKERRQ(ierr); 7794 ierr = MatDestroy(&B);CHKERRQ(ierr); 7795 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7796 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7797 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7798 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7799 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7800 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7801 ierr = VecDestroy(&v);CHKERRQ(ierr); 7802 } 7803 } 7804 if (reuser) { 7805 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7806 } else { 7807 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7808 } 7809 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7810 PetscScalar *arraym,*arrayv; 7811 PetscInt nl; 7812 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7813 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7814 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7815 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7816 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7817 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7818 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7819 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7820 } else { 7821 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7822 } 7823 } else { 7824 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7825 } 7826 if (coarse_mat_is || coarse_mat) { 7827 PetscMPIInt size; 7828 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7829 if (!multilevel_allowed) { 7830 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7831 } else { 7832 Mat A; 7833 7834 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7835 if (coarse_mat_is) { 7836 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7837 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7838 coarse_mat = coarse_mat_is; 7839 } 7840 /* be sure we don't have MatSeqDENSE as local mat */ 7841 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7842 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7843 } 7844 } 7845 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7846 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7847 7848 /* create local to global scatters for coarse problem */ 7849 if (compute_vecs) { 7850 PetscInt lrows; 7851 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7852 if (coarse_mat) { 7853 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7854 } else { 7855 lrows = 0; 7856 } 7857 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7858 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7859 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7860 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7861 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7862 } 7863 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7864 7865 /* set defaults for coarse KSP and PC */ 7866 if (multilevel_allowed) { 7867 coarse_ksp_type = KSPRICHARDSON; 7868 coarse_pc_type = PCBDDC; 7869 } else { 7870 coarse_ksp_type = KSPPREONLY; 7871 coarse_pc_type = PCREDUNDANT; 7872 } 7873 7874 /* print some info if requested */ 7875 if (pcbddc->dbg_flag) { 7876 if (!multilevel_allowed) { 7877 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7878 if (multilevel_requested) { 7879 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7880 } else if (pcbddc->max_levels) { 7881 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7882 } 7883 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7884 } 7885 } 7886 7887 /* communicate coarse discrete gradient */ 7888 coarseG = NULL; 7889 if (pcbddc->nedcG && multilevel_allowed) { 7890 MPI_Comm ccomm; 7891 if (coarse_mat) { 7892 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7893 } else { 7894 ccomm = MPI_COMM_NULL; 7895 } 7896 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7897 } 7898 7899 /* create the coarse KSP object only once with defaults */ 7900 if (coarse_mat) { 7901 PetscBool isredundant,isnn,isbddc; 7902 PetscViewer dbg_viewer = NULL; 7903 7904 if (pcbddc->dbg_flag) { 7905 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7906 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7907 } 7908 if (!pcbddc->coarse_ksp) { 7909 char prefix[256],str_level[16]; 7910 size_t len; 7911 7912 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7913 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7914 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7915 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7916 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7917 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7918 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7919 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7920 /* TODO is this logic correct? should check for coarse_mat type */ 7921 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7922 /* prefix */ 7923 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7924 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7925 if (!pcbddc->current_level) { 7926 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7927 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7928 } else { 7929 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7930 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7931 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7932 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7933 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 7934 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7935 } 7936 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7937 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7938 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7939 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7940 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7941 /* allow user customization */ 7942 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7943 } 7944 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7945 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7946 if (nisdofs) { 7947 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7948 for (i=0;i<nisdofs;i++) { 7949 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7950 } 7951 } 7952 if (nisneu) { 7953 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7954 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7955 } 7956 if (nisvert) { 7957 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7958 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7959 } 7960 if (coarseG) { 7961 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7962 } 7963 7964 /* get some info after set from options */ 7965 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7966 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 7967 if (isbddc && !multilevel_allowed) { 7968 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7969 isbddc = PETSC_FALSE; 7970 } 7971 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 7972 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7973 if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) { 7974 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 7975 isbddc = PETSC_TRUE; 7976 } 7977 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7978 if (isredundant) { 7979 KSP inner_ksp; 7980 PC inner_pc; 7981 7982 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7983 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7984 } 7985 7986 /* parameters which miss an API */ 7987 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7988 if (isbddc) { 7989 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7990 7991 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7992 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7993 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7994 if (pcbddc_coarse->benign_saddle_point) { 7995 Mat coarsedivudotp_is; 7996 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7997 IS row,col; 7998 const PetscInt *gidxs; 7999 PetscInt n,st,M,N; 8000 8001 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 8002 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 8003 st = st-n; 8004 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 8005 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 8006 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 8007 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8008 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 8009 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8010 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 8011 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 8012 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 8013 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 8014 ierr = ISDestroy(&row);CHKERRQ(ierr); 8015 ierr = ISDestroy(&col);CHKERRQ(ierr); 8016 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 8017 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8018 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8019 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8020 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8021 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8022 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8023 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8024 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8025 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8026 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8027 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8028 } 8029 } 8030 8031 /* propagate symmetry info of coarse matrix */ 8032 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8033 if (pc->pmat->symmetric_set) { 8034 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8035 } 8036 if (pc->pmat->hermitian_set) { 8037 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8038 } 8039 if (pc->pmat->spd_set) { 8040 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8041 } 8042 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8043 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8044 } 8045 /* set operators */ 8046 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8047 if (pcbddc->dbg_flag) { 8048 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8049 } 8050 } 8051 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8052 ierr = PetscFree(isarray);CHKERRQ(ierr); 8053 #if 0 8054 { 8055 PetscViewer viewer; 8056 char filename[256]; 8057 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8058 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8059 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8060 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8061 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8062 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8063 } 8064 #endif 8065 8066 if (pcbddc->coarse_ksp) { 8067 Vec crhs,csol; 8068 8069 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8070 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8071 if (!csol) { 8072 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8073 } 8074 if (!crhs) { 8075 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8076 } 8077 } 8078 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8079 8080 /* compute null space for coarse solver if the benign trick has been requested */ 8081 if (pcbddc->benign_null) { 8082 8083 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8084 for (i=0;i<pcbddc->benign_n;i++) { 8085 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8086 } 8087 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8088 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8089 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8090 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8091 if (coarse_mat) { 8092 Vec nullv; 8093 PetscScalar *array,*array2; 8094 PetscInt nl; 8095 8096 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8097 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8098 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8099 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8100 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 8101 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8102 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8103 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8104 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8105 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8106 } 8107 } 8108 8109 if (pcbddc->coarse_ksp) { 8110 PetscBool ispreonly; 8111 8112 if (CoarseNullSpace) { 8113 PetscBool isnull; 8114 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8115 if (isnull) { 8116 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8117 } 8118 /* TODO: add local nullspaces (if any) */ 8119 } 8120 /* setup coarse ksp */ 8121 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8122 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8123 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8124 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 8125 KSP check_ksp; 8126 KSPType check_ksp_type; 8127 PC check_pc; 8128 Vec check_vec,coarse_vec; 8129 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8130 PetscInt its; 8131 PetscBool compute_eigs; 8132 PetscReal *eigs_r,*eigs_c; 8133 PetscInt neigs; 8134 const char *prefix; 8135 8136 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8137 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8138 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8139 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8140 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8141 /* prevent from setup unneeded object */ 8142 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8143 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8144 if (ispreonly) { 8145 check_ksp_type = KSPPREONLY; 8146 compute_eigs = PETSC_FALSE; 8147 } else { 8148 check_ksp_type = KSPGMRES; 8149 compute_eigs = PETSC_TRUE; 8150 } 8151 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8152 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8153 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8154 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8155 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8156 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8157 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8158 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8159 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8160 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8161 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8162 /* create random vec */ 8163 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8164 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8165 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8166 /* solve coarse problem */ 8167 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8168 /* set eigenvalue estimation if preonly has not been requested */ 8169 if (compute_eigs) { 8170 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8171 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8172 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8173 if (neigs) { 8174 lambda_max = eigs_r[neigs-1]; 8175 lambda_min = eigs_r[0]; 8176 if (pcbddc->use_coarse_estimates) { 8177 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8178 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8179 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8180 } 8181 } 8182 } 8183 } 8184 8185 /* check coarse problem residual error */ 8186 if (pcbddc->dbg_flag) { 8187 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8188 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8189 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8190 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8191 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8192 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8193 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8194 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8195 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8196 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8197 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8198 if (CoarseNullSpace) { 8199 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8200 } 8201 if (compute_eigs) { 8202 PetscReal lambda_max_s,lambda_min_s; 8203 KSPConvergedReason reason; 8204 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8205 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8206 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8207 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8208 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8209 for (i=0;i<neigs;i++) { 8210 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8211 } 8212 } 8213 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8214 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8215 } 8216 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8217 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8218 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8219 if (compute_eigs) { 8220 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8221 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8222 } 8223 } 8224 } 8225 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8226 /* print additional info */ 8227 if (pcbddc->dbg_flag) { 8228 /* waits until all processes reaches this point */ 8229 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8230 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 8231 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8232 } 8233 8234 /* free memory */ 8235 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8236 PetscFunctionReturn(0); 8237 } 8238 8239 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8240 { 8241 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8242 PC_IS* pcis = (PC_IS*)pc->data; 8243 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8244 IS subset,subset_mult,subset_n; 8245 PetscInt local_size,coarse_size=0; 8246 PetscInt *local_primal_indices=NULL; 8247 const PetscInt *t_local_primal_indices; 8248 PetscErrorCode ierr; 8249 8250 PetscFunctionBegin; 8251 /* Compute global number of coarse dofs */ 8252 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8253 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8254 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8255 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8256 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8257 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8258 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8259 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8260 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8261 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8262 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8263 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8264 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 8265 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8266 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8267 8268 /* check numbering */ 8269 if (pcbddc->dbg_flag) { 8270 PetscScalar coarsesum,*array,*array2; 8271 PetscInt i; 8272 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8273 8274 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8275 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8276 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8277 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8278 /* counter */ 8279 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8280 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8281 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8282 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8283 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8284 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8285 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8286 for (i=0;i<pcbddc->local_primal_size;i++) { 8287 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8288 } 8289 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8290 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8291 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8292 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8293 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8294 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8295 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8296 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8297 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8298 for (i=0;i<pcis->n;i++) { 8299 if (array[i] != 0.0 && array[i] != array2[i]) { 8300 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8301 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8302 set_error = PETSC_TRUE; 8303 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8304 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8305 } 8306 } 8307 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8308 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8309 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8310 for (i=0;i<pcis->n;i++) { 8311 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8312 } 8313 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8314 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8315 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8316 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8317 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8318 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8319 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8320 PetscInt *gidxs; 8321 8322 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8323 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8324 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8325 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8326 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8327 for (i=0;i<pcbddc->local_primal_size;i++) { 8328 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8329 } 8330 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8331 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8332 } 8333 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8334 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8335 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8336 } 8337 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 8338 /* get back data */ 8339 *coarse_size_n = coarse_size; 8340 *local_primal_indices_n = local_primal_indices; 8341 PetscFunctionReturn(0); 8342 } 8343 8344 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8345 { 8346 IS localis_t; 8347 PetscInt i,lsize,*idxs,n; 8348 PetscScalar *vals; 8349 PetscErrorCode ierr; 8350 8351 PetscFunctionBegin; 8352 /* get indices in local ordering exploiting local to global map */ 8353 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8354 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8355 for (i=0;i<lsize;i++) vals[i] = 1.0; 8356 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8357 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8358 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8359 if (idxs) { /* multilevel guard */ 8360 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 8361 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8362 } 8363 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8364 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8365 ierr = PetscFree(vals);CHKERRQ(ierr); 8366 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8367 /* now compute set in local ordering */ 8368 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8369 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8370 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8371 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8372 for (i=0,lsize=0;i<n;i++) { 8373 if (PetscRealPart(vals[i]) > 0.5) { 8374 lsize++; 8375 } 8376 } 8377 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8378 for (i=0,lsize=0;i<n;i++) { 8379 if (PetscRealPart(vals[i]) > 0.5) { 8380 idxs[lsize++] = i; 8381 } 8382 } 8383 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8384 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8385 *localis = localis_t; 8386 PetscFunctionReturn(0); 8387 } 8388 8389 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8390 { 8391 PC_IS *pcis=(PC_IS*)pc->data; 8392 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8393 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8394 Mat S_j; 8395 PetscInt *used_xadj,*used_adjncy; 8396 PetscBool free_used_adj; 8397 PetscErrorCode ierr; 8398 8399 PetscFunctionBegin; 8400 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8401 free_used_adj = PETSC_FALSE; 8402 if (pcbddc->sub_schurs_layers == -1) { 8403 used_xadj = NULL; 8404 used_adjncy = NULL; 8405 } else { 8406 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8407 used_xadj = pcbddc->mat_graph->xadj; 8408 used_adjncy = pcbddc->mat_graph->adjncy; 8409 } else if (pcbddc->computed_rowadj) { 8410 used_xadj = pcbddc->mat_graph->xadj; 8411 used_adjncy = pcbddc->mat_graph->adjncy; 8412 } else { 8413 PetscBool flg_row=PETSC_FALSE; 8414 const PetscInt *xadj,*adjncy; 8415 PetscInt nvtxs; 8416 8417 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8418 if (flg_row) { 8419 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8420 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8421 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8422 free_used_adj = PETSC_TRUE; 8423 } else { 8424 pcbddc->sub_schurs_layers = -1; 8425 used_xadj = NULL; 8426 used_adjncy = NULL; 8427 } 8428 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8429 } 8430 } 8431 8432 /* setup sub_schurs data */ 8433 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8434 if (!sub_schurs->schur_explicit) { 8435 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8436 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8437 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8438 } else { 8439 Mat change = NULL; 8440 Vec scaling = NULL; 8441 IS change_primal = NULL, iP; 8442 PetscInt benign_n; 8443 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8444 PetscBool isseqaij,need_change = PETSC_FALSE; 8445 PetscBool discrete_harmonic = PETSC_FALSE; 8446 8447 if (!pcbddc->use_vertices && reuse_solvers) { 8448 PetscInt n_vertices; 8449 8450 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8451 reuse_solvers = (PetscBool)!n_vertices; 8452 } 8453 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8454 if (!isseqaij) { 8455 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8456 if (matis->A == pcbddc->local_mat) { 8457 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8458 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8459 } else { 8460 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8461 } 8462 } 8463 if (!pcbddc->benign_change_explicit) { 8464 benign_n = pcbddc->benign_n; 8465 } else { 8466 benign_n = 0; 8467 } 8468 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8469 We need a global reduction to avoid possible deadlocks. 8470 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8471 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8472 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8473 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8474 need_change = (PetscBool)(!need_change); 8475 } 8476 /* If the user defines additional constraints, we import them here. 8477 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8478 if (need_change) { 8479 PC_IS *pcisf; 8480 PC_BDDC *pcbddcf; 8481 PC pcf; 8482 8483 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8484 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8485 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8486 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8487 8488 /* hacks */ 8489 pcisf = (PC_IS*)pcf->data; 8490 pcisf->is_B_local = pcis->is_B_local; 8491 pcisf->vec1_N = pcis->vec1_N; 8492 pcisf->BtoNmap = pcis->BtoNmap; 8493 pcisf->n = pcis->n; 8494 pcisf->n_B = pcis->n_B; 8495 pcbddcf = (PC_BDDC*)pcf->data; 8496 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8497 pcbddcf->mat_graph = pcbddc->mat_graph; 8498 pcbddcf->use_faces = PETSC_TRUE; 8499 pcbddcf->use_change_of_basis = PETSC_TRUE; 8500 pcbddcf->use_change_on_faces = PETSC_TRUE; 8501 pcbddcf->use_qr_single = PETSC_TRUE; 8502 pcbddcf->fake_change = PETSC_TRUE; 8503 8504 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8505 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8506 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8507 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8508 change = pcbddcf->ConstraintMatrix; 8509 pcbddcf->ConstraintMatrix = NULL; 8510 8511 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8512 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8513 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8514 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8515 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8516 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8517 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8518 pcf->ops->destroy = NULL; 8519 pcf->ops->reset = NULL; 8520 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8521 } 8522 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8523 8524 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 8525 if (iP) { 8526 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 8527 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 8528 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8529 } 8530 if (discrete_harmonic) { 8531 Mat A; 8532 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 8533 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 8534 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 8535 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8536 ierr = MatDestroy(&A);CHKERRQ(ierr); 8537 } else { 8538 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8539 } 8540 ierr = MatDestroy(&change);CHKERRQ(ierr); 8541 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8542 } 8543 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8544 8545 /* free adjacency */ 8546 if (free_used_adj) { 8547 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8548 } 8549 PetscFunctionReturn(0); 8550 } 8551 8552 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8553 { 8554 PC_IS *pcis=(PC_IS*)pc->data; 8555 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8556 PCBDDCGraph graph; 8557 PetscErrorCode ierr; 8558 8559 PetscFunctionBegin; 8560 /* attach interface graph for determining subsets */ 8561 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8562 IS verticesIS,verticescomm; 8563 PetscInt vsize,*idxs; 8564 8565 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8566 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8567 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8568 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8569 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8570 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8571 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8572 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8573 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8574 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8575 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8576 } else { 8577 graph = pcbddc->mat_graph; 8578 } 8579 /* print some info */ 8580 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8581 IS vertices; 8582 PetscInt nv,nedges,nfaces; 8583 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8584 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8585 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8586 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8587 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8588 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8589 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8590 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8591 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8592 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8593 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8594 } 8595 8596 /* sub_schurs init */ 8597 if (!pcbddc->sub_schurs) { 8598 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8599 } 8600 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8601 pcbddc->sub_schurs->prefix = ((PetscObject)pc)->prefix; 8602 8603 /* free graph struct */ 8604 if (pcbddc->sub_schurs_rebuild) { 8605 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8606 } 8607 PetscFunctionReturn(0); 8608 } 8609 8610 PetscErrorCode PCBDDCCheckOperator(PC pc) 8611 { 8612 PC_IS *pcis=(PC_IS*)pc->data; 8613 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8614 PetscErrorCode ierr; 8615 8616 PetscFunctionBegin; 8617 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8618 IS zerodiag = NULL; 8619 Mat S_j,B0_B=NULL; 8620 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8621 PetscScalar *p0_check,*array,*array2; 8622 PetscReal norm; 8623 PetscInt i; 8624 8625 /* B0 and B0_B */ 8626 if (zerodiag) { 8627 IS dummy; 8628 8629 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8630 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8631 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8632 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8633 } 8634 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8635 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8636 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8637 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8638 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8639 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8640 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8641 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8642 /* S_j */ 8643 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8644 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8645 8646 /* mimic vector in \widetilde{W}_\Gamma */ 8647 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8648 /* continuous in primal space */ 8649 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8650 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8651 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8652 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8653 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8654 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8655 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8656 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8657 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8658 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8659 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8660 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8661 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8662 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8663 8664 /* assemble rhs for coarse problem */ 8665 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8666 /* local with Schur */ 8667 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8668 if (zerodiag) { 8669 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8670 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8671 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8672 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8673 } 8674 /* sum on primal nodes the local contributions */ 8675 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8676 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8677 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8678 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8679 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8680 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8681 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8682 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8683 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8684 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8685 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8686 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8687 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8688 /* scale primal nodes (BDDC sums contibutions) */ 8689 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8690 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8691 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8692 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8693 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8694 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8695 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8696 /* global: \widetilde{B0}_B w_\Gamma */ 8697 if (zerodiag) { 8698 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8699 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8700 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8701 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8702 } 8703 /* BDDC */ 8704 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8705 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8706 8707 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8708 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8709 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8710 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8711 for (i=0;i<pcbddc->benign_n;i++) { 8712 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8713 } 8714 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8715 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8716 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8717 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8718 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8719 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8720 } 8721 PetscFunctionReturn(0); 8722 } 8723 8724 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8725 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8726 { 8727 Mat At; 8728 IS rows; 8729 PetscInt rst,ren; 8730 PetscErrorCode ierr; 8731 PetscLayout rmap; 8732 8733 PetscFunctionBegin; 8734 rst = ren = 0; 8735 if (ccomm != MPI_COMM_NULL) { 8736 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8737 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8738 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8739 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8740 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8741 } 8742 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8743 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8744 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8745 8746 if (ccomm != MPI_COMM_NULL) { 8747 Mat_MPIAIJ *a,*b; 8748 IS from,to; 8749 Vec gvec; 8750 PetscInt lsize; 8751 8752 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8753 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8754 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8755 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8756 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8757 a = (Mat_MPIAIJ*)At->data; 8758 b = (Mat_MPIAIJ*)(*B)->data; 8759 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8760 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8761 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8762 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8763 b->A = a->A; 8764 b->B = a->B; 8765 8766 b->donotstash = a->donotstash; 8767 b->roworiented = a->roworiented; 8768 b->rowindices = 0; 8769 b->rowvalues = 0; 8770 b->getrowactive = PETSC_FALSE; 8771 8772 (*B)->rmap = rmap; 8773 (*B)->factortype = A->factortype; 8774 (*B)->assembled = PETSC_TRUE; 8775 (*B)->insertmode = NOT_SET_VALUES; 8776 (*B)->preallocated = PETSC_TRUE; 8777 8778 if (a->colmap) { 8779 #if defined(PETSC_USE_CTABLE) 8780 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8781 #else 8782 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8783 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8784 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8785 #endif 8786 } else b->colmap = 0; 8787 if (a->garray) { 8788 PetscInt len; 8789 len = a->B->cmap->n; 8790 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8791 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8792 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8793 } else b->garray = 0; 8794 8795 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8796 b->lvec = a->lvec; 8797 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8798 8799 /* cannot use VecScatterCopy */ 8800 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8801 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8802 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8803 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8804 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8805 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8806 ierr = ISDestroy(&from);CHKERRQ(ierr); 8807 ierr = ISDestroy(&to);CHKERRQ(ierr); 8808 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8809 } 8810 ierr = MatDestroy(&At);CHKERRQ(ierr); 8811 PetscFunctionReturn(0); 8812 } 8813