1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 #if !defined(PETSC_USE_COMPLEX) 18 PetscScalar *uwork,*data,*U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM,bN,lwork,lierr,di = 1; 21 PetscInt ulw,i,nr,nc,n; 22 PetscErrorCode ierr; 23 24 PetscFunctionBegin; 25 #if defined(PETSC_MISSING_LAPACK_GESVD) 26 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 27 #else 28 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 29 if (!nr || !nc) PetscFunctionReturn(0); 30 31 /* workspace */ 32 if (!work) { 33 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 34 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 35 } else { 36 ulw = lw; 37 uwork = work; 38 } 39 n = PetscMin(nr,nc); 40 if (!rwork) { 41 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 42 } else { 43 sing = rwork; 44 } 45 46 /* SVD */ 47 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 49 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 50 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 51 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 52 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 53 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 54 ierr = PetscFPTrapPop();CHKERRQ(ierr); 55 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 56 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 57 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 58 if (!rwork) { 59 ierr = PetscFree(sing);CHKERRQ(ierr); 60 } 61 if (!work) { 62 ierr = PetscFree(uwork);CHKERRQ(ierr); 63 } 64 /* create B */ 65 if (!range) { 66 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 67 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 68 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 69 } else { 70 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 71 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 72 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 73 } 74 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 75 ierr = PetscFree(U);CHKERRQ(ierr); 76 #endif 77 #else /* PETSC_USE_COMPLEX */ 78 PetscFunctionBegin; 79 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 80 #endif 81 PetscFunctionReturn(0); 82 } 83 84 /* TODO REMOVE */ 85 #if defined(PRINT_GDET) 86 static int inc = 0; 87 static int lev = 0; 88 #endif 89 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 PetscScalar *vals,v; 122 123 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 124 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 125 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 126 /* v = PetscAbsScalar(vals[0]) */; 127 v = 1.; 128 cvals[0] = vals[0]/v; 129 cvals[1] = vals[1]/v; 130 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 131 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 132 #if defined(PRINT_GDET) 133 { 134 PetscViewer viewer; 135 char filename[256]; 136 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 137 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 138 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 140 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 141 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 142 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 143 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 144 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 145 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 146 } 147 #endif 148 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 149 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 150 } 151 152 PetscFunctionReturn(0); 153 } 154 155 PetscErrorCode PCBDDCNedelecSupport(PC pc) 156 { 157 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 158 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 159 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 160 Vec tvec; 161 PetscSF sfv; 162 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 163 MPI_Comm comm; 164 IS lned,primals,allprimals,nedfieldlocal; 165 IS *eedges,*extrows,*extcols,*alleedges; 166 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 167 PetscScalar *vals,*work; 168 PetscReal *rwork; 169 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 170 PetscInt ne,nv,Lv,order,n,field; 171 PetscInt n_neigh,*neigh,*n_shared,**shared; 172 PetscInt i,j,extmem,cum,maxsize,nee; 173 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 174 PetscInt *sfvleaves,*sfvroots; 175 PetscInt *corners,*cedges; 176 PetscInt *ecount,**eneighs,*vcount,**vneighs; 177 #if defined(PETSC_USE_DEBUG) 178 PetscInt *emarks; 179 #endif 180 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 181 PetscErrorCode ierr; 182 183 PetscFunctionBegin; 184 /* If the discrete gradient is defined for a subset of dofs and global is true, 185 it assumes G is given in global ordering for all the dofs. 186 Otherwise, the ordering is global for the Nedelec field */ 187 order = pcbddc->nedorder; 188 conforming = pcbddc->conforming; 189 field = pcbddc->nedfield; 190 global = pcbddc->nedglobal; 191 setprimal = PETSC_FALSE; 192 print = PETSC_FALSE; 193 singular = PETSC_FALSE; 194 195 /* Command line customization */ 196 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 199 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 200 /* print debug info TODO: to be removed */ 201 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 202 ierr = PetscOptionsEnd();CHKERRQ(ierr); 203 204 /* Return if there are no edges in the decomposition and the problem is not singular */ 205 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 206 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 207 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 208 if (!singular) { 209 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 210 lrc[0] = PETSC_FALSE; 211 for (i=0;i<n;i++) { 212 if (PetscRealPart(vals[i]) > 2.) { 213 lrc[0] = PETSC_TRUE; 214 break; 215 } 216 } 217 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 218 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 219 if (!lrc[1]) PetscFunctionReturn(0); 220 } 221 222 /* Get Nedelec field */ 223 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 224 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 225 if (pcbddc->n_ISForDofsLocal && field >= 0) { 226 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 227 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 228 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 229 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 230 ne = n; 231 nedfieldlocal = NULL; 232 global = PETSC_TRUE; 233 } else if (field == PETSC_DECIDE) { 234 PetscInt rst,ren,*idx; 235 236 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 237 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 238 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 239 for (i=rst;i<ren;i++) { 240 PetscInt nc; 241 242 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 243 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 244 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 245 } 246 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 247 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 248 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 249 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 250 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 251 } else { 252 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 253 } 254 255 /* Sanity checks */ 256 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 257 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 258 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 259 260 /* Just set primal dofs and return */ 261 if (setprimal) { 262 IS enedfieldlocal; 263 PetscInt *eidxs; 264 265 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 266 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 267 if (nedfieldlocal) { 268 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 269 for (i=0,cum=0;i<ne;i++) { 270 if (PetscRealPart(vals[idxs[i]]) > 2.) { 271 eidxs[cum++] = idxs[i]; 272 } 273 } 274 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 275 } else { 276 for (i=0,cum=0;i<ne;i++) { 277 if (PetscRealPart(vals[i]) > 2.) { 278 eidxs[cum++] = i; 279 } 280 } 281 } 282 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 283 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 284 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 285 ierr = PetscFree(eidxs);CHKERRQ(ierr); 286 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 287 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 288 PetscFunctionReturn(0); 289 } 290 291 /* Compute some l2g maps */ 292 if (nedfieldlocal) { 293 IS is; 294 295 /* need to map from the local Nedelec field to local numbering */ 296 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 297 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 298 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 299 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 300 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 301 if (global) { 302 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 303 el2g = al2g; 304 } else { 305 IS gis; 306 307 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 308 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 309 ierr = ISDestroy(&gis);CHKERRQ(ierr); 310 } 311 ierr = ISDestroy(&is);CHKERRQ(ierr); 312 } else { 313 /* restore default */ 314 pcbddc->nedfield = -1; 315 /* one ref for the destruction of al2g, one for el2g */ 316 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 317 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 318 el2g = al2g; 319 fl2g = NULL; 320 } 321 322 /* Start communication to drop connections for interior edges (for cc analysis only) */ 323 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 324 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 325 if (nedfieldlocal) { 326 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 327 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 328 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 329 } else { 330 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 331 } 332 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 333 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 334 335 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 336 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 337 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 338 if (global) { 339 PetscInt rst; 340 341 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 342 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 343 if (matis->sf_rootdata[i] < 2) { 344 matis->sf_rootdata[cum++] = i + rst; 345 } 346 } 347 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 348 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 349 } else { 350 PetscInt *tbz; 351 352 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 353 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 354 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 355 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 356 for (i=0,cum=0;i<ne;i++) 357 if (matis->sf_leafdata[idxs[i]] == 1) 358 tbz[cum++] = i; 359 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 360 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 361 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 362 ierr = PetscFree(tbz);CHKERRQ(ierr); 363 } 364 } else { /* we need the entire G to infer the nullspace */ 365 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 366 G = pcbddc->discretegradient; 367 } 368 369 /* Extract subdomain relevant rows of G */ 370 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 371 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 372 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 373 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 374 ierr = ISDestroy(&lned);CHKERRQ(ierr); 375 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 376 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 377 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 378 379 /* SF for nodal dofs communications */ 380 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 381 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 382 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 384 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 386 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 387 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 388 i = singular ? 2 : 1; 389 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 390 391 /* Destroy temporary G created in MATIS format and modified G */ 392 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 393 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 394 ierr = MatDestroy(&G);CHKERRQ(ierr); 395 396 if (print) { 397 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 398 ierr = MatView(lG,NULL);CHKERRQ(ierr); 399 } 400 401 /* Save lG for values insertion in change of basis */ 402 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 403 404 /* Analyze the edge-nodes connections (duplicate lG) */ 405 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 406 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 407 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 409 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 410 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 411 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 412 /* need to import the boundary specification to ensure the 413 proper detection of coarse edges' endpoints */ 414 if (pcbddc->DirichletBoundariesLocal) { 415 IS is; 416 417 if (fl2g) { 418 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 419 } else { 420 is = pcbddc->DirichletBoundariesLocal; 421 } 422 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 423 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 424 for (i=0;i<cum;i++) { 425 if (idxs[i] >= 0) { 426 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 427 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 428 } 429 } 430 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 431 if (fl2g) { 432 ierr = ISDestroy(&is);CHKERRQ(ierr); 433 } 434 } 435 if (pcbddc->NeumannBoundariesLocal) { 436 IS is; 437 438 if (fl2g) { 439 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 440 } else { 441 is = pcbddc->NeumannBoundariesLocal; 442 } 443 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 444 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 445 for (i=0;i<cum;i++) { 446 if (idxs[i] >= 0) { 447 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 448 } 449 } 450 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 451 if (fl2g) { 452 ierr = ISDestroy(&is);CHKERRQ(ierr); 453 } 454 } 455 456 /* Count neighs per dof */ 457 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 458 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 459 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 460 for (i=1,cum=0;i<n_neigh;i++) { 461 cum += n_shared[i]; 462 for (j=0;j<n_shared[i];j++) { 463 ecount[shared[i][j]]++; 464 } 465 } 466 if (ne) { 467 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 468 } 469 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 470 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 471 for (i=1;i<n_neigh;i++) { 472 for (j=0;j<n_shared[i];j++) { 473 PetscInt k = shared[i][j]; 474 eneighs[k][ecount[k]] = neigh[i]; 475 ecount[k]++; 476 } 477 } 478 for (i=0;i<ne;i++) { 479 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 480 } 481 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 482 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 483 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 484 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 485 for (i=1,cum=0;i<n_neigh;i++) { 486 cum += n_shared[i]; 487 for (j=0;j<n_shared[i];j++) { 488 vcount[shared[i][j]]++; 489 } 490 } 491 if (nv) { 492 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 493 } 494 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 495 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 496 for (i=1;i<n_neigh;i++) { 497 for (j=0;j<n_shared[i];j++) { 498 PetscInt k = shared[i][j]; 499 vneighs[k][vcount[k]] = neigh[i]; 500 vcount[k]++; 501 } 502 } 503 for (i=0;i<nv;i++) { 504 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 505 } 506 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 507 508 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 509 for proper detection of coarse edges' endpoints */ 510 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 511 for (i=0;i<ne;i++) { 512 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 513 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 514 } 515 } 516 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 517 if (!conforming) { 518 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 519 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 520 } 521 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 522 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 523 cum = 0; 524 for (i=0;i<ne;i++) { 525 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 526 if (!PetscBTLookup(btee,i)) { 527 marks[cum++] = i; 528 continue; 529 } 530 /* set badly connected edge dofs as primal */ 531 if (!conforming) { 532 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 533 marks[cum++] = i; 534 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 535 for (j=ii[i];j<ii[i+1];j++) { 536 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 537 } 538 } else { 539 /* every edge dofs should be connected trough a certain number of nodal dofs 540 to other edge dofs belonging to coarse edges 541 - at most 2 endpoints 542 - order-1 interior nodal dofs 543 - no undefined nodal dofs (nconn < order) 544 */ 545 PetscInt ends = 0,ints = 0, undef = 0; 546 for (j=ii[i];j<ii[i+1];j++) { 547 PetscInt v = jj[j],k; 548 PetscInt nconn = iit[v+1]-iit[v]; 549 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 550 if (nconn > order) ends++; 551 else if (nconn == order) ints++; 552 else undef++; 553 } 554 if (undef || ends > 2 || ints != order -1) { 555 marks[cum++] = i; 556 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 557 for (j=ii[i];j<ii[i+1];j++) { 558 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 559 } 560 } 561 } 562 } 563 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 564 if (!order && ii[i+1] != ii[i]) { 565 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 566 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 567 } 568 } 569 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 570 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 571 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 572 if (!conforming) { 573 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 574 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 575 } 576 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 577 578 /* identify splitpoints and corner candidates */ 579 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 580 if (print) { 581 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 582 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 583 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 584 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 585 } 586 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 587 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 588 for (i=0;i<nv;i++) { 589 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 590 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 591 if (!order) { /* variable order */ 592 PetscReal vorder = 0.; 593 594 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 595 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 596 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 597 ord = 1; 598 } 599 #if defined(PETSC_USE_DEBUG) 600 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 601 #endif 602 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 603 if (PetscBTLookup(btbd,jj[j])) { 604 bdir = PETSC_TRUE; 605 break; 606 } 607 if (vc != ecount[jj[j]]) { 608 sneighs = PETSC_FALSE; 609 } else { 610 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 611 for (k=0;k<vc;k++) { 612 if (vn[k] != en[k]) { 613 sneighs = PETSC_FALSE; 614 break; 615 } 616 } 617 } 618 } 619 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 620 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 621 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 622 } else if (test == ord) { 623 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 624 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 625 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 626 } else { 627 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 628 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 629 } 630 } 631 } 632 ierr = PetscFree(ecount);CHKERRQ(ierr); 633 ierr = PetscFree(vcount);CHKERRQ(ierr); 634 if (ne) { 635 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 636 } 637 if (nv) { 638 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 639 } 640 ierr = PetscFree(eneighs);CHKERRQ(ierr); 641 ierr = PetscFree(vneighs);CHKERRQ(ierr); 642 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 643 644 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 645 if (order != 1) { 646 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 647 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 648 for (i=0;i<nv;i++) { 649 if (PetscBTLookup(btvcand,i)) { 650 PetscBool found = PETSC_FALSE; 651 for (j=ii[i];j<ii[i+1] && !found;j++) { 652 PetscInt k,e = jj[j]; 653 if (PetscBTLookup(bte,e)) continue; 654 for (k=iit[e];k<iit[e+1];k++) { 655 PetscInt v = jjt[k]; 656 if (v != i && PetscBTLookup(btvcand,v)) { 657 found = PETSC_TRUE; 658 break; 659 } 660 } 661 } 662 if (!found) { 663 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 664 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 665 } else { 666 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 667 } 668 } 669 } 670 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 671 } 672 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 673 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 674 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 675 676 /* Get the local G^T explicitly */ 677 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 678 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 679 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 680 681 /* Mark interior nodal dofs */ 682 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 683 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 684 for (i=1;i<n_neigh;i++) { 685 for (j=0;j<n_shared[i];j++) { 686 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 687 } 688 } 689 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 690 691 /* communicate corners and splitpoints */ 692 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 693 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 694 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 695 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 696 697 if (print) { 698 IS tbz; 699 700 cum = 0; 701 for (i=0;i<nv;i++) 702 if (sfvleaves[i]) 703 vmarks[cum++] = i; 704 705 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 706 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 707 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 708 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 709 } 710 711 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 712 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 713 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 714 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 715 716 /* Zero rows of lGt corresponding to identified corners 717 and interior nodal dofs */ 718 cum = 0; 719 for (i=0;i<nv;i++) { 720 if (sfvleaves[i]) { 721 vmarks[cum++] = i; 722 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 723 } 724 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 725 } 726 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 727 if (print) { 728 IS tbz; 729 730 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 731 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 732 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 733 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 734 } 735 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 736 ierr = PetscFree(vmarks);CHKERRQ(ierr); 737 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 738 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 739 740 /* Recompute G */ 741 ierr = MatDestroy(&lG);CHKERRQ(ierr); 742 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 743 if (print) { 744 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 745 ierr = MatView(lG,NULL);CHKERRQ(ierr); 746 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 747 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 748 } 749 750 /* Get primal dofs (if any) */ 751 cum = 0; 752 for (i=0;i<ne;i++) { 753 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 754 } 755 if (fl2g) { 756 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 757 } 758 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 759 if (print) { 760 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 761 ierr = ISView(primals,NULL);CHKERRQ(ierr); 762 } 763 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 764 /* TODO: what if the user passed in some of them ? */ 765 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 766 ierr = ISDestroy(&primals);CHKERRQ(ierr); 767 768 /* Compute edge connectivity */ 769 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 770 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 771 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 772 if (fl2g) { 773 PetscBT btf; 774 PetscInt *iia,*jja,*iiu,*jju; 775 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 776 777 /* create CSR for all local dofs */ 778 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 779 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 780 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 781 iiu = pcbddc->mat_graph->xadj; 782 jju = pcbddc->mat_graph->adjncy; 783 } else if (pcbddc->use_local_adj) { 784 rest = PETSC_TRUE; 785 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 786 } else { 787 free = PETSC_TRUE; 788 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 789 iiu[0] = 0; 790 for (i=0;i<n;i++) { 791 iiu[i+1] = i+1; 792 jju[i] = -1; 793 } 794 } 795 796 /* import sizes of CSR */ 797 iia[0] = 0; 798 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 799 800 /* overwrite entries corresponding to the Nedelec field */ 801 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 802 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 803 for (i=0;i<ne;i++) { 804 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 805 iia[idxs[i]+1] = ii[i+1]-ii[i]; 806 } 807 808 /* iia in CSR */ 809 for (i=0;i<n;i++) iia[i+1] += iia[i]; 810 811 /* jja in CSR */ 812 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 813 for (i=0;i<n;i++) 814 if (!PetscBTLookup(btf,i)) 815 for (j=0;j<iiu[i+1]-iiu[i];j++) 816 jja[iia[i]+j] = jju[iiu[i]+j]; 817 818 /* map edge dofs connectivity */ 819 if (jj) { 820 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 821 for (i=0;i<ne;i++) { 822 PetscInt e = idxs[i]; 823 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 824 } 825 } 826 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 827 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 828 if (rest) { 829 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 830 } 831 if (free) { 832 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 833 } 834 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 835 } else { 836 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 837 } 838 839 /* Analyze interface for edge dofs */ 840 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 841 pcbddc->mat_graph->twodim = PETSC_FALSE; 842 843 /* Get coarse edges in the edge space */ 844 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 845 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 846 847 if (fl2g) { 848 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 849 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 850 for (i=0;i<nee;i++) { 851 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 852 } 853 } else { 854 eedges = alleedges; 855 primals = allprimals; 856 } 857 858 /* Mark fine edge dofs with their coarse edge id */ 859 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 860 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 861 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 862 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 863 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 864 if (print) { 865 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 866 ierr = ISView(primals,NULL);CHKERRQ(ierr); 867 } 868 869 maxsize = 0; 870 for (i=0;i<nee;i++) { 871 PetscInt size,mark = i+1; 872 873 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 874 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 875 for (j=0;j<size;j++) marks[idxs[j]] = mark; 876 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 877 maxsize = PetscMax(maxsize,size); 878 } 879 880 /* Find coarse edge endpoints */ 881 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 882 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 883 for (i=0;i<nee;i++) { 884 PetscInt mark = i+1,size; 885 886 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 887 if (!size && nedfieldlocal) continue; 888 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 889 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 890 if (print) { 891 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 892 ISView(eedges[i],NULL); 893 } 894 for (j=0;j<size;j++) { 895 PetscInt k, ee = idxs[j]; 896 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 897 for (k=ii[ee];k<ii[ee+1];k++) { 898 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 899 if (PetscBTLookup(btv,jj[k])) { 900 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 901 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 902 PetscInt k2; 903 PetscBool corner = PETSC_FALSE; 904 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 905 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 906 /* it's a corner if either is connected with an edge dof belonging to a different cc or 907 if the edge dof lie on the natural part of the boundary */ 908 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 909 corner = PETSC_TRUE; 910 break; 911 } 912 } 913 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 914 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 915 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 916 } else { 917 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 918 } 919 } 920 } 921 } 922 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 923 } 924 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 925 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 926 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 927 928 /* Reset marked primal dofs */ 929 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 930 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 931 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 932 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 933 934 /* Now use the initial lG */ 935 ierr = MatDestroy(&lG);CHKERRQ(ierr); 936 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 937 lG = lGinit; 938 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 939 940 /* Compute extended cols indices */ 941 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 942 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 943 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 944 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 945 i *= maxsize; 946 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 947 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 948 eerr = PETSC_FALSE; 949 for (i=0;i<nee;i++) { 950 PetscInt size,found = 0; 951 952 cum = 0; 953 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 954 if (!size && nedfieldlocal) continue; 955 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 956 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 957 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 958 for (j=0;j<size;j++) { 959 PetscInt k,ee = idxs[j]; 960 for (k=ii[ee];k<ii[ee+1];k++) { 961 PetscInt vv = jj[k]; 962 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 963 else if (!PetscBTLookupSet(btvc,vv)) found++; 964 } 965 } 966 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 967 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 968 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 969 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 970 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 971 /* it may happen that endpoints are not defined at this point 972 if it is the case, mark this edge for a second pass */ 973 if (cum != size -1 || found != 2) { 974 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 975 if (print) { 976 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 977 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 978 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 979 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 980 } 981 eerr = PETSC_TRUE; 982 } 983 } 984 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 985 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 986 if (done) { 987 PetscInt *newprimals; 988 989 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 990 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 991 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 992 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 993 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 994 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 995 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 996 for (i=0;i<nee;i++) { 997 PetscBool has_candidates = PETSC_FALSE; 998 if (PetscBTLookup(bter,i)) { 999 PetscInt size,mark = i+1; 1000 1001 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1002 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1003 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1004 for (j=0;j<size;j++) { 1005 PetscInt k,ee = idxs[j]; 1006 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1007 for (k=ii[ee];k<ii[ee+1];k++) { 1008 /* set all candidates located on the edge as corners */ 1009 if (PetscBTLookup(btvcand,jj[k])) { 1010 PetscInt k2,vv = jj[k]; 1011 has_candidates = PETSC_TRUE; 1012 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1013 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1014 /* set all edge dofs connected to candidate as primals */ 1015 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1016 if (marks[jjt[k2]] == mark) { 1017 PetscInt k3,ee2 = jjt[k2]; 1018 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1019 newprimals[cum++] = ee2; 1020 /* finally set the new corners */ 1021 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1022 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1023 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1024 } 1025 } 1026 } 1027 } else { 1028 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1029 } 1030 } 1031 } 1032 if (!has_candidates) { /* circular edge */ 1033 PetscInt k, ee = idxs[0],*tmarks; 1034 1035 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1036 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1037 for (k=ii[ee];k<ii[ee+1];k++) { 1038 PetscInt k2; 1039 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1040 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1041 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1042 } 1043 for (j=0;j<size;j++) { 1044 if (tmarks[idxs[j]] > 1) { 1045 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1046 newprimals[cum++] = idxs[j]; 1047 } 1048 } 1049 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1050 } 1051 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1052 } 1053 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1054 } 1055 ierr = PetscFree(extcols);CHKERRQ(ierr); 1056 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1057 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1058 if (fl2g) { 1059 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1060 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1061 for (i=0;i<nee;i++) { 1062 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1063 } 1064 ierr = PetscFree(eedges);CHKERRQ(ierr); 1065 } 1066 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1067 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1068 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1069 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1070 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1071 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1072 pcbddc->mat_graph->twodim = PETSC_FALSE; 1073 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1074 if (fl2g) { 1075 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1076 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1077 for (i=0;i<nee;i++) { 1078 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1079 } 1080 } else { 1081 eedges = alleedges; 1082 primals = allprimals; 1083 } 1084 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1085 1086 /* Mark again */ 1087 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1088 for (i=0;i<nee;i++) { 1089 PetscInt size,mark = i+1; 1090 1091 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1092 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1093 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1094 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1095 } 1096 if (print) { 1097 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1098 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1099 } 1100 1101 /* Recompute extended cols */ 1102 eerr = PETSC_FALSE; 1103 for (i=0;i<nee;i++) { 1104 PetscInt size; 1105 1106 cum = 0; 1107 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1108 if (!size && nedfieldlocal) continue; 1109 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1110 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1111 for (j=0;j<size;j++) { 1112 PetscInt k,ee = idxs[j]; 1113 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1114 } 1115 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1116 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1117 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1118 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1119 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1120 if (cum != size -1) { 1121 if (print) { 1122 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1123 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1125 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1126 } 1127 eerr = PETSC_TRUE; 1128 } 1129 } 1130 } 1131 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1132 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1133 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1134 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1135 /* an error should not occur at this point */ 1136 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1137 1138 /* Check the number of endpoints */ 1139 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1140 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1141 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1142 for (i=0;i<nee;i++) { 1143 PetscInt size, found = 0, gc[2]; 1144 1145 /* init with defaults */ 1146 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1147 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1148 if (!size && nedfieldlocal) continue; 1149 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1150 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1151 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1152 for (j=0;j<size;j++) { 1153 PetscInt k,ee = idxs[j]; 1154 for (k=ii[ee];k<ii[ee+1];k++) { 1155 PetscInt vv = jj[k]; 1156 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1157 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1158 corners[i*2+found++] = vv; 1159 } 1160 } 1161 } 1162 if (found != 2) { 1163 PetscInt e; 1164 if (fl2g) { 1165 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1166 } else { 1167 e = idxs[0]; 1168 } 1169 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1170 } 1171 1172 /* get primal dof index on this coarse edge */ 1173 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1174 if (gc[0] > gc[1]) { 1175 PetscInt swap = corners[2*i]; 1176 corners[2*i] = corners[2*i+1]; 1177 corners[2*i+1] = swap; 1178 } 1179 cedges[i] = idxs[size-1]; 1180 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1181 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1182 } 1183 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1184 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1185 1186 #if defined(PETSC_USE_DEBUG) 1187 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1188 not interfere with neighbouring coarse edges */ 1189 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1190 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1191 for (i=0;i<nv;i++) { 1192 PetscInt emax = 0,eemax = 0; 1193 1194 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1195 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1196 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1197 for (j=1;j<nee+1;j++) { 1198 if (emax < emarks[j]) { 1199 emax = emarks[j]; 1200 eemax = j; 1201 } 1202 } 1203 /* not relevant for edges */ 1204 if (!eemax) continue; 1205 1206 for (j=ii[i];j<ii[i+1];j++) { 1207 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1208 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1209 } 1210 } 1211 } 1212 ierr = PetscFree(emarks);CHKERRQ(ierr); 1213 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1214 #endif 1215 1216 /* Compute extended rows indices for edge blocks of the change of basis */ 1217 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1218 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1219 extmem *= maxsize; 1220 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1221 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1222 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1223 for (i=0;i<nv;i++) { 1224 PetscInt mark = 0,size,start; 1225 1226 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1227 for (j=ii[i];j<ii[i+1];j++) 1228 if (marks[jj[j]] && !mark) 1229 mark = marks[jj[j]]; 1230 1231 /* not relevant */ 1232 if (!mark) continue; 1233 1234 /* import extended row */ 1235 mark--; 1236 start = mark*extmem+extrowcum[mark]; 1237 size = ii[i+1]-ii[i]; 1238 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1239 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1240 extrowcum[mark] += size; 1241 } 1242 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1243 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1244 ierr = PetscFree(marks);CHKERRQ(ierr); 1245 1246 /* Compress extrows */ 1247 cum = 0; 1248 for (i=0;i<nee;i++) { 1249 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1250 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1251 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1252 cum = PetscMax(cum,size); 1253 } 1254 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1255 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1256 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1257 1258 /* Workspace for lapack inner calls and VecSetValues */ 1259 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1260 1261 /* Create change of basis matrix (preallocation can be improved) */ 1262 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1263 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1264 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1265 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1266 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1267 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1268 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1269 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1270 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1271 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1272 1273 /* Defaults to identity */ 1274 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1275 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1276 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1277 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1278 1279 /* Create discrete gradient for the coarser level if needed */ 1280 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1281 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1282 if (pcbddc->current_level < pcbddc->max_levels) { 1283 ISLocalToGlobalMapping cel2g,cvl2g; 1284 IS wis,gwis; 1285 PetscInt cnv,cne; 1286 1287 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1288 if (fl2g) { 1289 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1290 } else { 1291 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1292 pcbddc->nedclocal = wis; 1293 } 1294 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1295 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1296 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1297 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1298 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1299 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1300 1301 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1302 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1303 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1304 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1305 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1306 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1307 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1308 1309 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1310 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1311 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1312 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1313 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1314 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1315 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1316 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1317 } 1318 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1319 1320 #if defined(PRINT_GDET) 1321 inc = 0; 1322 lev = pcbddc->current_level; 1323 #endif 1324 1325 /* Insert values in the change of basis matrix */ 1326 for (i=0;i<nee;i++) { 1327 Mat Gins = NULL, GKins = NULL; 1328 IS cornersis = NULL; 1329 PetscScalar cvals[2]; 1330 1331 if (pcbddc->nedcG) { 1332 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1333 } 1334 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1335 if (Gins && GKins) { 1336 PetscScalar *data; 1337 const PetscInt *rows,*cols; 1338 PetscInt nrh,nch,nrc,ncc; 1339 1340 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1341 /* H1 */ 1342 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1343 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1344 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1345 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1346 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1347 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1348 /* complement */ 1349 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1350 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1351 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1352 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1353 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1354 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1355 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1356 1357 /* coarse discrete gradient */ 1358 if (pcbddc->nedcG) { 1359 PetscInt cols[2]; 1360 1361 cols[0] = 2*i; 1362 cols[1] = 2*i+1; 1363 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1364 } 1365 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1366 } 1367 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1368 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1369 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1370 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1371 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1372 } 1373 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1374 1375 /* Start assembling */ 1376 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1377 if (pcbddc->nedcG) { 1378 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1379 } 1380 1381 /* Free */ 1382 if (fl2g) { 1383 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1384 for (i=0;i<nee;i++) { 1385 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(eedges);CHKERRQ(ierr); 1388 } 1389 1390 /* hack mat_graph with primal dofs on the coarse edges */ 1391 { 1392 PCBDDCGraph graph = pcbddc->mat_graph; 1393 PetscInt *oqueue = graph->queue; 1394 PetscInt *ocptr = graph->cptr; 1395 PetscInt ncc,*idxs; 1396 1397 /* find first primal edge */ 1398 if (pcbddc->nedclocal) { 1399 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1400 } else { 1401 if (fl2g) { 1402 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1403 } 1404 idxs = cedges; 1405 } 1406 cum = 0; 1407 while (cum < nee && cedges[cum] < 0) cum++; 1408 1409 /* adapt connected components */ 1410 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1411 graph->cptr[0] = 0; 1412 for (i=0,ncc=0;i<graph->ncc;i++) { 1413 PetscInt lc = ocptr[i+1]-ocptr[i]; 1414 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1415 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1416 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1417 ncc++; 1418 lc--; 1419 cum++; 1420 while (cum < nee && cedges[cum] < 0) cum++; 1421 } 1422 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1423 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1424 ncc++; 1425 } 1426 graph->ncc = ncc; 1427 if (pcbddc->nedclocal) { 1428 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1429 } 1430 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1431 } 1432 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1433 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1434 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1435 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1436 1437 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1438 ierr = PetscFree(extrow);CHKERRQ(ierr); 1439 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1440 ierr = PetscFree(corners);CHKERRQ(ierr); 1441 ierr = PetscFree(cedges);CHKERRQ(ierr); 1442 ierr = PetscFree(extrows);CHKERRQ(ierr); 1443 ierr = PetscFree(extcols);CHKERRQ(ierr); 1444 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1445 1446 /* Complete assembling */ 1447 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1448 if (pcbddc->nedcG) { 1449 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1450 #if 0 1451 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1452 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1453 #endif 1454 } 1455 1456 /* set change of basis */ 1457 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1458 ierr = MatDestroy(&T);CHKERRQ(ierr); 1459 1460 PetscFunctionReturn(0); 1461 } 1462 1463 /* the near-null space of BDDC carries information on quadrature weights, 1464 and these can be collinear -> so cheat with MatNullSpaceCreate 1465 and create a suitable set of basis vectors first */ 1466 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1467 { 1468 PetscErrorCode ierr; 1469 PetscInt i; 1470 1471 PetscFunctionBegin; 1472 for (i=0;i<nvecs;i++) { 1473 PetscInt first,last; 1474 1475 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1476 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1477 if (i>=first && i < last) { 1478 PetscScalar *data; 1479 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1480 if (!has_const) { 1481 data[i-first] = 1.; 1482 } else { 1483 data[2*i-first] = 1./PetscSqrtReal(2.); 1484 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1485 } 1486 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1487 } 1488 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1489 } 1490 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1491 for (i=0;i<nvecs;i++) { /* reset vectors */ 1492 PetscInt first,last; 1493 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1494 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1495 if (i>=first && i < last) { 1496 PetscScalar *data; 1497 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1498 if (!has_const) { 1499 data[i-first] = 0.; 1500 } else { 1501 data[2*i-first] = 0.; 1502 data[2*i-first+1] = 0.; 1503 } 1504 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1505 } 1506 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1507 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1508 } 1509 PetscFunctionReturn(0); 1510 } 1511 1512 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1513 { 1514 Mat loc_divudotp; 1515 Vec p,v,vins,quad_vec,*quad_vecs; 1516 ISLocalToGlobalMapping map; 1517 IS *faces,*edges; 1518 PetscScalar *vals; 1519 const PetscScalar *array; 1520 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1521 PetscMPIInt rank; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1526 if (graph->twodim) { 1527 lmaxneighs = 2; 1528 } else { 1529 lmaxneighs = 1; 1530 for (i=0;i<ne;i++) { 1531 const PetscInt *idxs; 1532 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1533 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1534 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1535 } 1536 lmaxneighs++; /* graph count does not include self */ 1537 } 1538 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1539 maxsize = 0; 1540 for (i=0;i<ne;i++) { 1541 PetscInt nn; 1542 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1543 maxsize = PetscMax(maxsize,nn); 1544 } 1545 for (i=0;i<nf;i++) { 1546 PetscInt nn; 1547 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1548 maxsize = PetscMax(maxsize,nn); 1549 } 1550 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1551 /* create vectors to hold quadrature weights */ 1552 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1553 if (!transpose) { 1554 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1555 } else { 1556 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1557 } 1558 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1559 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1560 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1561 for (i=0;i<maxneighs;i++) { 1562 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1563 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1564 } 1565 1566 /* compute local quad vec */ 1567 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1568 if (!transpose) { 1569 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1570 } else { 1571 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1572 } 1573 ierr = VecSet(p,1.);CHKERRQ(ierr); 1574 if (!transpose) { 1575 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1576 } else { 1577 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1578 } 1579 if (vl2l) { 1580 Mat lA; 1581 VecScatter sc; 1582 1583 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1584 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1585 ierr = VecScatterCreate(v,vl2l,vins,NULL,&sc);CHKERRQ(ierr); 1586 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1587 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1588 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1589 } else { 1590 vins = v; 1591 } 1592 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1593 ierr = VecDestroy(&p);CHKERRQ(ierr); 1594 1595 /* insert in global quadrature vecs */ 1596 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1597 for (i=0;i<nf;i++) { 1598 const PetscInt *idxs; 1599 PetscInt idx,nn,j; 1600 1601 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1602 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1603 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1604 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1605 idx = -(idx+1); 1606 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1607 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1608 } 1609 for (i=0;i<ne;i++) { 1610 const PetscInt *idxs; 1611 PetscInt idx,nn,j; 1612 1613 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1614 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1615 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1616 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1617 idx = -(idx+1); 1618 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1619 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1620 } 1621 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1622 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1623 if (vl2l) { 1624 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1625 } 1626 ierr = VecDestroy(&v);CHKERRQ(ierr); 1627 ierr = PetscFree(vals);CHKERRQ(ierr); 1628 1629 /* assemble near null space */ 1630 for (i=0;i<maxneighs;i++) { 1631 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1632 } 1633 for (i=0;i<maxneighs;i++) { 1634 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1635 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1636 } 1637 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1638 PetscFunctionReturn(0); 1639 } 1640 1641 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1642 { 1643 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1644 PetscErrorCode ierr; 1645 1646 PetscFunctionBegin; 1647 if (primalv) { 1648 if (pcbddc->user_primal_vertices_local) { 1649 IS list[2], newp; 1650 1651 list[0] = primalv; 1652 list[1] = pcbddc->user_primal_vertices_local; 1653 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1654 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1655 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1656 pcbddc->user_primal_vertices_local = newp; 1657 } else { 1658 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1659 } 1660 } 1661 PetscFunctionReturn(0); 1662 } 1663 1664 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1665 { 1666 PetscErrorCode ierr; 1667 Vec local,global; 1668 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1669 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1670 PetscBool monolithic = PETSC_FALSE; 1671 1672 PetscFunctionBegin; 1673 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1674 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1675 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1676 /* need to convert from global to local topology information and remove references to information in global ordering */ 1677 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1678 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1679 if (monolithic) goto boundary; 1680 1681 if (pcbddc->user_provided_isfordofs) { 1682 if (pcbddc->n_ISForDofs) { 1683 PetscInt i; 1684 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1685 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1686 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1687 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1688 } 1689 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1690 pcbddc->n_ISForDofs = 0; 1691 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1692 } 1693 } else { 1694 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1695 DM dm; 1696 1697 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1698 if (!dm) { 1699 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1700 } 1701 if (dm) { 1702 IS *fields; 1703 PetscInt nf,i; 1704 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1705 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1706 for (i=0;i<nf;i++) { 1707 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1708 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1709 } 1710 ierr = PetscFree(fields);CHKERRQ(ierr); 1711 pcbddc->n_ISForDofsLocal = nf; 1712 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1713 PetscContainer c; 1714 1715 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1716 if (c) { 1717 MatISLocalFields lf; 1718 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1719 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1720 } else { /* fallback, create the default fields if bs > 1 */ 1721 PetscInt i, n = matis->A->rmap->n; 1722 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1723 if (i > 1) { 1724 pcbddc->n_ISForDofsLocal = i; 1725 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1726 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1727 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1728 } 1729 } 1730 } 1731 } 1732 } else { 1733 PetscInt i; 1734 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1735 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1736 } 1737 } 1738 } 1739 1740 boundary: 1741 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1742 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1743 } else if (pcbddc->DirichletBoundariesLocal) { 1744 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1745 } 1746 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1747 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1748 } else if (pcbddc->NeumannBoundariesLocal) { 1749 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1750 } 1751 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1752 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1753 } 1754 ierr = VecDestroy(&global);CHKERRQ(ierr); 1755 ierr = VecDestroy(&local);CHKERRQ(ierr); 1756 /* detect local disconnected subdomains if requested (use matis->A) */ 1757 if (pcbddc->detect_disconnected) { 1758 IS primalv = NULL; 1759 PetscInt i; 1760 1761 for (i=0;i<pcbddc->n_local_subs;i++) { 1762 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1763 } 1764 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1765 ierr = PCBDDCDetectDisconnectedComponents(pc,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1766 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1767 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1768 } 1769 /* early stage corner detection */ 1770 { 1771 DM dm; 1772 1773 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1774 if (dm) { 1775 PetscBool isda; 1776 1777 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1778 if (isda) { 1779 ISLocalToGlobalMapping l2l; 1780 IS corners; 1781 Mat lA; 1782 1783 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1784 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1785 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1786 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1787 if (l2l) { 1788 const PetscInt *idx; 1789 PetscInt bs,*idxout,n; 1790 1791 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1792 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1793 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1794 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1795 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1796 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1797 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1798 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1799 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1800 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1801 } else { /* not from DMDA */ 1802 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1803 } 1804 } 1805 } 1806 } 1807 PetscFunctionReturn(0); 1808 } 1809 1810 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1811 { 1812 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1813 PetscErrorCode ierr; 1814 IS nis; 1815 const PetscInt *idxs; 1816 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1817 PetscBool *ld; 1818 1819 PetscFunctionBegin; 1820 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1821 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1822 if (mop == MPI_LAND) { 1823 /* init rootdata with true */ 1824 ld = (PetscBool*) matis->sf_rootdata; 1825 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1826 } else { 1827 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1828 } 1829 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1830 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1831 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1832 ld = (PetscBool*) matis->sf_leafdata; 1833 for (i=0;i<nd;i++) 1834 if (-1 < idxs[i] && idxs[i] < n) 1835 ld[idxs[i]] = PETSC_TRUE; 1836 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1837 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1838 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1839 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1840 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1841 if (mop == MPI_LAND) { 1842 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1843 } else { 1844 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1845 } 1846 for (i=0,nnd=0;i<n;i++) 1847 if (ld[i]) 1848 nidxs[nnd++] = i; 1849 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1850 ierr = ISDestroy(is);CHKERRQ(ierr); 1851 *is = nis; 1852 PetscFunctionReturn(0); 1853 } 1854 1855 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1856 { 1857 PC_IS *pcis = (PC_IS*)(pc->data); 1858 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1859 PetscErrorCode ierr; 1860 1861 PetscFunctionBegin; 1862 if (!pcbddc->benign_have_null) { 1863 PetscFunctionReturn(0); 1864 } 1865 if (pcbddc->ChangeOfBasisMatrix) { 1866 Vec swap; 1867 1868 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1869 swap = pcbddc->work_change; 1870 pcbddc->work_change = r; 1871 r = swap; 1872 } 1873 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1874 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1875 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1876 ierr = VecSet(z,0.);CHKERRQ(ierr); 1877 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1878 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1879 if (pcbddc->ChangeOfBasisMatrix) { 1880 pcbddc->work_change = r; 1881 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1882 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1883 } 1884 PetscFunctionReturn(0); 1885 } 1886 1887 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1888 { 1889 PCBDDCBenignMatMult_ctx ctx; 1890 PetscErrorCode ierr; 1891 PetscBool apply_right,apply_left,reset_x; 1892 1893 PetscFunctionBegin; 1894 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1895 if (transpose) { 1896 apply_right = ctx->apply_left; 1897 apply_left = ctx->apply_right; 1898 } else { 1899 apply_right = ctx->apply_right; 1900 apply_left = ctx->apply_left; 1901 } 1902 reset_x = PETSC_FALSE; 1903 if (apply_right) { 1904 const PetscScalar *ax; 1905 PetscInt nl,i; 1906 1907 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1908 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1909 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1910 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1911 for (i=0;i<ctx->benign_n;i++) { 1912 PetscScalar sum,val; 1913 const PetscInt *idxs; 1914 PetscInt nz,j; 1915 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1916 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1917 sum = 0.; 1918 if (ctx->apply_p0) { 1919 val = ctx->work[idxs[nz-1]]; 1920 for (j=0;j<nz-1;j++) { 1921 sum += ctx->work[idxs[j]]; 1922 ctx->work[idxs[j]] += val; 1923 } 1924 } else { 1925 for (j=0;j<nz-1;j++) { 1926 sum += ctx->work[idxs[j]]; 1927 } 1928 } 1929 ctx->work[idxs[nz-1]] -= sum; 1930 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1931 } 1932 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1933 reset_x = PETSC_TRUE; 1934 } 1935 if (transpose) { 1936 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1937 } else { 1938 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1939 } 1940 if (reset_x) { 1941 ierr = VecResetArray(x);CHKERRQ(ierr); 1942 } 1943 if (apply_left) { 1944 PetscScalar *ay; 1945 PetscInt i; 1946 1947 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1948 for (i=0;i<ctx->benign_n;i++) { 1949 PetscScalar sum,val; 1950 const PetscInt *idxs; 1951 PetscInt nz,j; 1952 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1953 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1954 val = -ay[idxs[nz-1]]; 1955 if (ctx->apply_p0) { 1956 sum = 0.; 1957 for (j=0;j<nz-1;j++) { 1958 sum += ay[idxs[j]]; 1959 ay[idxs[j]] += val; 1960 } 1961 ay[idxs[nz-1]] += sum; 1962 } else { 1963 for (j=0;j<nz-1;j++) { 1964 ay[idxs[j]] += val; 1965 } 1966 ay[idxs[nz-1]] = 0.; 1967 } 1968 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1969 } 1970 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1971 } 1972 PetscFunctionReturn(0); 1973 } 1974 1975 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1976 { 1977 PetscErrorCode ierr; 1978 1979 PetscFunctionBegin; 1980 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1981 PetscFunctionReturn(0); 1982 } 1983 1984 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1985 { 1986 PetscErrorCode ierr; 1987 1988 PetscFunctionBegin; 1989 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1990 PetscFunctionReturn(0); 1991 } 1992 1993 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1994 { 1995 PC_IS *pcis = (PC_IS*)pc->data; 1996 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1997 PCBDDCBenignMatMult_ctx ctx; 1998 PetscErrorCode ierr; 1999 2000 PetscFunctionBegin; 2001 if (!restore) { 2002 Mat A_IB,A_BI; 2003 PetscScalar *work; 2004 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2005 2006 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2007 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2008 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 2009 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 2010 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2011 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 2012 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 2013 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 2014 ierr = PetscNew(&ctx);CHKERRQ(ierr); 2015 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 2016 ctx->apply_left = PETSC_TRUE; 2017 ctx->apply_right = PETSC_FALSE; 2018 ctx->apply_p0 = PETSC_FALSE; 2019 ctx->benign_n = pcbddc->benign_n; 2020 if (reuse) { 2021 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2022 ctx->free = PETSC_FALSE; 2023 } else { /* TODO: could be optimized for successive solves */ 2024 ISLocalToGlobalMapping N_to_D; 2025 PetscInt i; 2026 2027 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2028 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2029 for (i=0;i<pcbddc->benign_n;i++) { 2030 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2031 } 2032 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2033 ctx->free = PETSC_TRUE; 2034 } 2035 ctx->A = pcis->A_IB; 2036 ctx->work = work; 2037 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2038 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2039 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2040 pcis->A_IB = A_IB; 2041 2042 /* A_BI as A_IB^T */ 2043 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2044 pcbddc->benign_original_mat = pcis->A_BI; 2045 pcis->A_BI = A_BI; 2046 } else { 2047 if (!pcbddc->benign_original_mat) { 2048 PetscFunctionReturn(0); 2049 } 2050 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2051 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2052 pcis->A_IB = ctx->A; 2053 ctx->A = NULL; 2054 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2055 pcis->A_BI = pcbddc->benign_original_mat; 2056 pcbddc->benign_original_mat = NULL; 2057 if (ctx->free) { 2058 PetscInt i; 2059 for (i=0;i<ctx->benign_n;i++) { 2060 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2061 } 2062 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2063 } 2064 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2065 ierr = PetscFree(ctx);CHKERRQ(ierr); 2066 } 2067 PetscFunctionReturn(0); 2068 } 2069 2070 /* used just in bddc debug mode */ 2071 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2072 { 2073 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2074 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2075 Mat An; 2076 PetscErrorCode ierr; 2077 2078 PetscFunctionBegin; 2079 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2080 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2081 if (is1) { 2082 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2083 ierr = MatDestroy(&An);CHKERRQ(ierr); 2084 } else { 2085 *B = An; 2086 } 2087 PetscFunctionReturn(0); 2088 } 2089 2090 /* TODO: add reuse flag */ 2091 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2092 { 2093 Mat Bt; 2094 PetscScalar *a,*bdata; 2095 const PetscInt *ii,*ij; 2096 PetscInt m,n,i,nnz,*bii,*bij; 2097 PetscBool flg_row; 2098 PetscErrorCode ierr; 2099 2100 PetscFunctionBegin; 2101 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2102 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2103 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2104 nnz = n; 2105 for (i=0;i<ii[n];i++) { 2106 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2107 } 2108 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2109 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2110 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2111 nnz = 0; 2112 bii[0] = 0; 2113 for (i=0;i<n;i++) { 2114 PetscInt j; 2115 for (j=ii[i];j<ii[i+1];j++) { 2116 PetscScalar entry = a[j]; 2117 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 2118 bij[nnz] = ij[j]; 2119 bdata[nnz] = entry; 2120 nnz++; 2121 } 2122 } 2123 bii[i+1] = nnz; 2124 } 2125 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2126 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2127 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2128 { 2129 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2130 b->free_a = PETSC_TRUE; 2131 b->free_ij = PETSC_TRUE; 2132 } 2133 *B = Bt; 2134 PetscFunctionReturn(0); 2135 } 2136 2137 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscInt *ncc, IS* cc[], IS* primalv) 2138 { 2139 Mat B = NULL; 2140 DM dm; 2141 IS is_dummy,*cc_n; 2142 ISLocalToGlobalMapping l2gmap_dummy; 2143 PCBDDCGraph graph; 2144 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2145 PetscInt i,n; 2146 PetscInt *xadj,*adjncy; 2147 PetscBool isplex = PETSC_FALSE; 2148 PetscErrorCode ierr; 2149 2150 PetscFunctionBegin; 2151 if (ncc) *ncc = 0; 2152 if (cc) *cc = NULL; 2153 if (primalv) *primalv = NULL; 2154 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2155 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2156 if (!dm) { 2157 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2158 } 2159 if (dm) { 2160 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2161 } 2162 if (isplex) { /* this code has been modified from plexpartition.c */ 2163 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2164 PetscInt *adj = NULL; 2165 IS cellNumbering; 2166 const PetscInt *cellNum; 2167 PetscBool useCone, useClosure; 2168 PetscSection section; 2169 PetscSegBuffer adjBuffer; 2170 PetscSF sfPoint; 2171 PetscErrorCode ierr; 2172 2173 PetscFunctionBegin; 2174 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2175 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2176 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2177 /* Build adjacency graph via a section/segbuffer */ 2178 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2179 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2180 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2181 /* Always use FVM adjacency to create partitioner graph */ 2182 ierr = DMPlexGetAdjacencyUseCone(dm, &useCone);CHKERRQ(ierr); 2183 ierr = DMPlexGetAdjacencyUseClosure(dm, &useClosure);CHKERRQ(ierr); 2184 ierr = DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);CHKERRQ(ierr); 2185 ierr = DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);CHKERRQ(ierr); 2186 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2187 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2188 for (n = 0, p = pStart; p < pEnd; p++) { 2189 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2190 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2191 adjSize = PETSC_DETERMINE; 2192 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2193 for (a = 0; a < adjSize; ++a) { 2194 const PetscInt point = adj[a]; 2195 if (pStart <= point && point < pEnd) { 2196 PetscInt *PETSC_RESTRICT pBuf; 2197 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2198 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2199 *pBuf = point; 2200 } 2201 } 2202 n++; 2203 } 2204 ierr = DMPlexSetAdjacencyUseCone(dm, useCone);CHKERRQ(ierr); 2205 ierr = DMPlexSetAdjacencyUseClosure(dm, useClosure);CHKERRQ(ierr); 2206 /* Derive CSR graph from section/segbuffer */ 2207 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2208 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2209 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2210 for (idx = 0, p = pStart; p < pEnd; p++) { 2211 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2212 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2213 } 2214 xadj[n] = size; 2215 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2216 /* Clean up */ 2217 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2218 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2219 ierr = PetscFree(adj);CHKERRQ(ierr); 2220 graph->xadj = xadj; 2221 graph->adjncy = adjncy; 2222 } else { 2223 Mat A; 2224 PetscBool filter = PETSC_FALSE, isseqaij, flg_row; 2225 2226 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2227 if (!A->rmap->N || !A->cmap->N) { 2228 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2229 PetscFunctionReturn(0); 2230 } 2231 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2232 if (!isseqaij && filter) { 2233 PetscBool isseqdense; 2234 2235 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2236 if (!isseqdense) { 2237 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2238 } else { /* TODO: rectangular case and LDA */ 2239 PetscScalar *array; 2240 PetscReal chop=1.e-6; 2241 2242 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2243 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2244 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2245 for (i=0;i<n;i++) { 2246 PetscInt j; 2247 for (j=i+1;j<n;j++) { 2248 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2249 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2250 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2251 } 2252 } 2253 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2254 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2255 } 2256 } else { 2257 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2258 B = A; 2259 } 2260 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2261 2262 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2263 if (filter) { 2264 PetscScalar *data; 2265 PetscInt j,cum; 2266 2267 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2268 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2269 cum = 0; 2270 for (i=0;i<n;i++) { 2271 PetscInt t; 2272 2273 for (j=xadj[i];j<xadj[i+1];j++) { 2274 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2275 continue; 2276 } 2277 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2278 } 2279 t = xadj_filtered[i]; 2280 xadj_filtered[i] = cum; 2281 cum += t; 2282 } 2283 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2284 graph->xadj = xadj_filtered; 2285 graph->adjncy = adjncy_filtered; 2286 } else { 2287 graph->xadj = xadj; 2288 graph->adjncy = adjncy; 2289 } 2290 } 2291 /* compute local connected components using PCBDDCGraph */ 2292 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2293 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2294 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2295 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2296 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2297 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2298 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2299 2300 /* partial clean up */ 2301 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2302 if (B) { 2303 PetscBool flg_row; 2304 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2305 ierr = MatDestroy(&B);CHKERRQ(ierr); 2306 } 2307 if (isplex) { 2308 ierr = PetscFree(xadj);CHKERRQ(ierr); 2309 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2310 } 2311 2312 /* get back data */ 2313 if (isplex) { 2314 if (ncc) *ncc = graph->ncc; 2315 if (cc || primalv) { 2316 Mat A; 2317 PetscBT btv,btvt; 2318 PetscSection subSection; 2319 PetscInt *ids,cum,cump,*cids,*pids; 2320 2321 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2322 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2323 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2324 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2325 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2326 2327 cids[0] = 0; 2328 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2329 PetscInt j; 2330 2331 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2332 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2333 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2334 2335 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2336 for (k = 0; k < 2*size; k += 2) { 2337 PetscInt s, p = closure[k], off, dof, cdof; 2338 2339 ierr = PetscSectionGetConstraintDof(subSection, p, &cdof);CHKERRQ(ierr); 2340 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2341 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2342 for (s = 0; s < dof-cdof; s++) { 2343 if (PetscBTLookupSet(btvt,off+s)) continue; 2344 if (!PetscBTLookup(btv,off+s)) { 2345 ids[cum++] = off+s; 2346 } else { /* cross-vertex */ 2347 pids[cump++] = off+s; 2348 } 2349 } 2350 } 2351 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2352 } 2353 cids[i+1] = cum; 2354 /* mark dofs as already assigned */ 2355 for (j = cids[i]; j < cids[i+1]; j++) { 2356 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2357 } 2358 } 2359 if (cc) { 2360 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2361 for (i = 0; i < graph->ncc; i++) { 2362 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2363 } 2364 *cc = cc_n; 2365 } 2366 if (primalv) { 2367 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2368 } 2369 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2370 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2371 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2372 } 2373 } else { 2374 if (ncc) *ncc = graph->ncc; 2375 if (cc) { 2376 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2377 for (i=0;i<graph->ncc;i++) { 2378 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2379 } 2380 *cc = cc_n; 2381 } 2382 } 2383 /* clean up graph */ 2384 graph->xadj = 0; 2385 graph->adjncy = 0; 2386 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2387 PetscFunctionReturn(0); 2388 } 2389 2390 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2391 { 2392 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2393 PC_IS* pcis = (PC_IS*)(pc->data); 2394 IS dirIS = NULL; 2395 PetscInt i; 2396 PetscErrorCode ierr; 2397 2398 PetscFunctionBegin; 2399 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2400 if (zerodiag) { 2401 Mat A; 2402 Vec vec3_N; 2403 PetscScalar *vals; 2404 const PetscInt *idxs; 2405 PetscInt nz,*count; 2406 2407 /* p0 */ 2408 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2409 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2410 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2411 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2412 for (i=0;i<nz;i++) vals[i] = 1.; 2413 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2414 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2415 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2416 /* v_I */ 2417 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2418 for (i=0;i<nz;i++) vals[i] = 0.; 2419 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2420 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2421 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2422 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2423 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2424 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2425 if (dirIS) { 2426 PetscInt n; 2427 2428 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2429 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2430 for (i=0;i<n;i++) vals[i] = 0.; 2431 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2432 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2433 } 2434 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2435 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2436 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2437 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2438 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2439 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2440 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2441 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2442 ierr = PetscFree(vals);CHKERRQ(ierr); 2443 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2444 2445 /* there should not be any pressure dofs lying on the interface */ 2446 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2447 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2448 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2449 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2450 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2451 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2452 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2453 ierr = PetscFree(count);CHKERRQ(ierr); 2454 } 2455 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2456 2457 /* check PCBDDCBenignGetOrSetP0 */ 2458 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2459 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2460 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2461 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2462 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2463 for (i=0;i<pcbddc->benign_n;i++) { 2464 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2465 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2466 } 2467 PetscFunctionReturn(0); 2468 } 2469 2470 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2471 { 2472 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2473 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2474 PetscInt nz,n; 2475 PetscInt *interior_dofs,n_interior_dofs,nneu; 2476 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2477 PetscErrorCode ierr; 2478 2479 PetscFunctionBegin; 2480 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2481 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2482 for (n=0;n<pcbddc->benign_n;n++) { 2483 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2484 } 2485 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2486 pcbddc->benign_n = 0; 2487 2488 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2489 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2490 Checks if all the pressure dofs in each subdomain have a zero diagonal 2491 If not, a change of basis on pressures is not needed 2492 since the local Schur complements are already SPD 2493 */ 2494 has_null_pressures = PETSC_TRUE; 2495 have_null = PETSC_TRUE; 2496 if (pcbddc->n_ISForDofsLocal) { 2497 IS iP = NULL; 2498 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2499 2500 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2501 ierr = PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2502 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2503 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2504 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2505 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2506 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2507 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2508 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2509 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2510 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2511 if (iP) { 2512 IS newpressures; 2513 2514 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2515 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2516 pressures = newpressures; 2517 } 2518 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2519 if (!sorted) { 2520 ierr = ISSort(pressures);CHKERRQ(ierr); 2521 } 2522 } else { 2523 pressures = NULL; 2524 } 2525 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2526 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2527 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2528 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2529 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2530 if (!sorted) { 2531 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2532 } 2533 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2534 zerodiag_save = zerodiag; 2535 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2536 if (!nz) { 2537 if (n) have_null = PETSC_FALSE; 2538 has_null_pressures = PETSC_FALSE; 2539 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2540 } 2541 recompute_zerodiag = PETSC_FALSE; 2542 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2543 zerodiag_subs = NULL; 2544 pcbddc->benign_n = 0; 2545 n_interior_dofs = 0; 2546 interior_dofs = NULL; 2547 nneu = 0; 2548 if (pcbddc->NeumannBoundariesLocal) { 2549 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2550 } 2551 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2552 if (checkb) { /* need to compute interior nodes */ 2553 PetscInt n,i,j; 2554 PetscInt n_neigh,*neigh,*n_shared,**shared; 2555 PetscInt *iwork; 2556 2557 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2558 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2559 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2560 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2561 for (i=1;i<n_neigh;i++) 2562 for (j=0;j<n_shared[i];j++) 2563 iwork[shared[i][j]] += 1; 2564 for (i=0;i<n;i++) 2565 if (!iwork[i]) 2566 interior_dofs[n_interior_dofs++] = i; 2567 ierr = PetscFree(iwork);CHKERRQ(ierr); 2568 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2569 } 2570 if (has_null_pressures) { 2571 IS *subs; 2572 PetscInt nsubs,i,j,nl; 2573 const PetscInt *idxs; 2574 PetscScalar *array; 2575 Vec *work; 2576 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2577 2578 subs = pcbddc->local_subs; 2579 nsubs = pcbddc->n_local_subs; 2580 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2581 if (checkb) { 2582 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2583 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2584 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2585 /* work[0] = 1_p */ 2586 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2587 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2588 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2589 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2590 /* work[0] = 1_v */ 2591 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2592 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2593 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2594 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2595 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2596 } 2597 if (nsubs > 1) { 2598 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2599 for (i=0;i<nsubs;i++) { 2600 ISLocalToGlobalMapping l2g; 2601 IS t_zerodiag_subs; 2602 PetscInt nl; 2603 2604 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2605 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2606 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2607 if (nl) { 2608 PetscBool valid = PETSC_TRUE; 2609 2610 if (checkb) { 2611 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2612 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2613 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2614 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2615 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2616 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2617 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2618 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2619 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2620 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2621 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2622 for (j=0;j<n_interior_dofs;j++) { 2623 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2624 valid = PETSC_FALSE; 2625 break; 2626 } 2627 } 2628 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2629 } 2630 if (valid && nneu) { 2631 const PetscInt *idxs; 2632 PetscInt nzb; 2633 2634 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2635 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2636 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2637 if (nzb) valid = PETSC_FALSE; 2638 } 2639 if (valid && pressures) { 2640 IS t_pressure_subs; 2641 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2642 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2643 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2644 } 2645 if (valid) { 2646 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2647 pcbddc->benign_n++; 2648 } else { 2649 recompute_zerodiag = PETSC_TRUE; 2650 } 2651 } 2652 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2653 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2654 } 2655 } else { /* there's just one subdomain (or zero if they have not been detected */ 2656 PetscBool valid = PETSC_TRUE; 2657 2658 if (nneu) valid = PETSC_FALSE; 2659 if (valid && pressures) { 2660 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2661 } 2662 if (valid && checkb) { 2663 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2664 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2665 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2666 for (j=0;j<n_interior_dofs;j++) { 2667 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2668 valid = PETSC_FALSE; 2669 break; 2670 } 2671 } 2672 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2673 } 2674 if (valid) { 2675 pcbddc->benign_n = 1; 2676 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2677 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2678 zerodiag_subs[0] = zerodiag; 2679 } 2680 } 2681 if (checkb) { 2682 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2683 } 2684 } 2685 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2686 2687 if (!pcbddc->benign_n) { 2688 PetscInt n; 2689 2690 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2691 recompute_zerodiag = PETSC_FALSE; 2692 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2693 if (n) { 2694 has_null_pressures = PETSC_FALSE; 2695 have_null = PETSC_FALSE; 2696 } 2697 } 2698 2699 /* final check for null pressures */ 2700 if (zerodiag && pressures) { 2701 PetscInt nz,np; 2702 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2703 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2704 if (nz != np) have_null = PETSC_FALSE; 2705 } 2706 2707 if (recompute_zerodiag) { 2708 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2709 if (pcbddc->benign_n == 1) { 2710 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2711 zerodiag = zerodiag_subs[0]; 2712 } else { 2713 PetscInt i,nzn,*new_idxs; 2714 2715 nzn = 0; 2716 for (i=0;i<pcbddc->benign_n;i++) { 2717 PetscInt ns; 2718 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2719 nzn += ns; 2720 } 2721 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2722 nzn = 0; 2723 for (i=0;i<pcbddc->benign_n;i++) { 2724 PetscInt ns,*idxs; 2725 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2726 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2727 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2728 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2729 nzn += ns; 2730 } 2731 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2732 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2733 } 2734 have_null = PETSC_FALSE; 2735 } 2736 2737 /* Prepare matrix to compute no-net-flux */ 2738 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2739 Mat A,loc_divudotp; 2740 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2741 IS row,col,isused = NULL; 2742 PetscInt M,N,n,st,n_isused; 2743 2744 if (pressures) { 2745 isused = pressures; 2746 } else { 2747 isused = zerodiag_save; 2748 } 2749 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2750 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2751 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2752 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2753 n_isused = 0; 2754 if (isused) { 2755 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2756 } 2757 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2758 st = st-n_isused; 2759 if (n) { 2760 const PetscInt *gidxs; 2761 2762 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2763 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2764 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2765 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2766 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2767 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2768 } else { 2769 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2770 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2771 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2772 } 2773 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2774 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2775 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2776 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2777 ierr = ISDestroy(&row);CHKERRQ(ierr); 2778 ierr = ISDestroy(&col);CHKERRQ(ierr); 2779 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2780 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2781 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2782 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2783 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2784 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2785 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2786 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2787 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2788 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2789 } 2790 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2791 2792 /* change of basis and p0 dofs */ 2793 if (has_null_pressures) { 2794 IS zerodiagc; 2795 const PetscInt *idxs,*idxsc; 2796 PetscInt i,s,*nnz; 2797 2798 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2799 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2800 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2801 /* local change of basis for pressures */ 2802 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2803 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2804 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2805 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2806 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2807 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2808 for (i=0;i<pcbddc->benign_n;i++) { 2809 PetscInt nzs,j; 2810 2811 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2812 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2813 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2814 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2815 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2816 } 2817 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2818 ierr = PetscFree(nnz);CHKERRQ(ierr); 2819 /* set identity on velocities */ 2820 for (i=0;i<n-nz;i++) { 2821 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2822 } 2823 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2824 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2825 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2826 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2827 /* set change on pressures */ 2828 for (s=0;s<pcbddc->benign_n;s++) { 2829 PetscScalar *array; 2830 PetscInt nzs; 2831 2832 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2833 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2834 for (i=0;i<nzs-1;i++) { 2835 PetscScalar vals[2]; 2836 PetscInt cols[2]; 2837 2838 cols[0] = idxs[i]; 2839 cols[1] = idxs[nzs-1]; 2840 vals[0] = 1.; 2841 vals[1] = 1.; 2842 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2843 } 2844 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2845 for (i=0;i<nzs-1;i++) array[i] = -1.; 2846 array[nzs-1] = 1.; 2847 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2848 /* store local idxs for p0 */ 2849 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2850 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2851 ierr = PetscFree(array);CHKERRQ(ierr); 2852 } 2853 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2854 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2855 /* project if needed */ 2856 if (pcbddc->benign_change_explicit) { 2857 Mat M; 2858 2859 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2860 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2861 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2862 ierr = MatDestroy(&M);CHKERRQ(ierr); 2863 } 2864 /* store global idxs for p0 */ 2865 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2866 } 2867 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2868 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2869 2870 /* determines if the coarse solver will be singular or not */ 2871 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2872 /* determines if the problem has subdomains with 0 pressure block */ 2873 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2874 *zerodiaglocal = zerodiag; 2875 PetscFunctionReturn(0); 2876 } 2877 2878 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2879 { 2880 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2881 PetscScalar *array; 2882 PetscErrorCode ierr; 2883 2884 PetscFunctionBegin; 2885 if (!pcbddc->benign_sf) { 2886 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2887 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2888 } 2889 if (get) { 2890 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2891 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2892 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2893 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2894 } else { 2895 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2896 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2897 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2898 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2899 } 2900 PetscFunctionReturn(0); 2901 } 2902 2903 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2904 { 2905 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2906 PetscErrorCode ierr; 2907 2908 PetscFunctionBegin; 2909 /* TODO: add error checking 2910 - avoid nested pop (or push) calls. 2911 - cannot push before pop. 2912 - cannot call this if pcbddc->local_mat is NULL 2913 */ 2914 if (!pcbddc->benign_n) { 2915 PetscFunctionReturn(0); 2916 } 2917 if (pop) { 2918 if (pcbddc->benign_change_explicit) { 2919 IS is_p0; 2920 MatReuse reuse; 2921 2922 /* extract B_0 */ 2923 reuse = MAT_INITIAL_MATRIX; 2924 if (pcbddc->benign_B0) { 2925 reuse = MAT_REUSE_MATRIX; 2926 } 2927 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2928 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2929 /* remove rows and cols from local problem */ 2930 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2931 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2932 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2933 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2934 } else { 2935 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2936 PetscScalar *vals; 2937 PetscInt i,n,*idxs_ins; 2938 2939 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2940 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2941 if (!pcbddc->benign_B0) { 2942 PetscInt *nnz; 2943 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2944 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2945 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2946 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2947 for (i=0;i<pcbddc->benign_n;i++) { 2948 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2949 nnz[i] = n - nnz[i]; 2950 } 2951 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2952 ierr = PetscFree(nnz);CHKERRQ(ierr); 2953 } 2954 2955 for (i=0;i<pcbddc->benign_n;i++) { 2956 PetscScalar *array; 2957 PetscInt *idxs,j,nz,cum; 2958 2959 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2960 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2961 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2962 for (j=0;j<nz;j++) vals[j] = 1.; 2963 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2964 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2965 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2966 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2967 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2968 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2969 cum = 0; 2970 for (j=0;j<n;j++) { 2971 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2972 vals[cum] = array[j]; 2973 idxs_ins[cum] = j; 2974 cum++; 2975 } 2976 } 2977 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2978 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2979 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2980 } 2981 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2982 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2983 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2984 } 2985 } else { /* push */ 2986 if (pcbddc->benign_change_explicit) { 2987 PetscInt i; 2988 2989 for (i=0;i<pcbddc->benign_n;i++) { 2990 PetscScalar *B0_vals; 2991 PetscInt *B0_cols,B0_ncol; 2992 2993 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2994 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2995 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2996 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2997 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2998 } 2999 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3000 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3001 } else { 3002 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 3003 } 3004 } 3005 PetscFunctionReturn(0); 3006 } 3007 3008 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3009 { 3010 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3011 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3012 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3013 PetscBLASInt *B_iwork,*B_ifail; 3014 PetscScalar *work,lwork; 3015 PetscScalar *St,*S,*eigv; 3016 PetscScalar *Sarray,*Starray; 3017 PetscReal *eigs,thresh; 3018 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3019 PetscBool allocated_S_St; 3020 #if defined(PETSC_USE_COMPLEX) 3021 PetscReal *rwork; 3022 #endif 3023 PetscErrorCode ierr; 3024 3025 PetscFunctionBegin; 3026 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3027 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3028 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\nRerun with -sub_schurs_hermitian 1 -sub_schurs_posdef 1 if the problem is SPD",sub_schurs->is_hermitian,sub_schurs->is_posdef); 3029 3030 if (pcbddc->dbg_flag) { 3031 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3032 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3033 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3034 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3035 } 3036 3037 if (pcbddc->dbg_flag) { 3038 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 3039 } 3040 3041 /* max size of subsets */ 3042 mss = 0; 3043 for (i=0;i<sub_schurs->n_subs;i++) { 3044 PetscInt subset_size; 3045 3046 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3047 mss = PetscMax(mss,subset_size); 3048 } 3049 3050 /* min/max and threshold */ 3051 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3052 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3053 nmax = PetscMax(nmin,nmax); 3054 allocated_S_St = PETSC_FALSE; 3055 if (nmin) { 3056 allocated_S_St = PETSC_TRUE; 3057 } 3058 3059 /* allocate lapack workspace */ 3060 cum = cum2 = 0; 3061 maxneigs = 0; 3062 for (i=0;i<sub_schurs->n_subs;i++) { 3063 PetscInt n,subset_size; 3064 3065 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3066 n = PetscMin(subset_size,nmax); 3067 cum += subset_size; 3068 cum2 += subset_size*n; 3069 maxneigs = PetscMax(maxneigs,n); 3070 } 3071 if (mss) { 3072 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 3073 PetscBLASInt B_itype = 1; 3074 PetscBLASInt B_N = mss; 3075 PetscReal zero = 0.0; 3076 PetscReal eps = 0.0; /* dlamch? */ 3077 3078 B_lwork = -1; 3079 S = NULL; 3080 St = NULL; 3081 eigs = NULL; 3082 eigv = NULL; 3083 B_iwork = NULL; 3084 B_ifail = NULL; 3085 #if defined(PETSC_USE_COMPLEX) 3086 rwork = NULL; 3087 #endif 3088 thresh = 1.0; 3089 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3090 #if defined(PETSC_USE_COMPLEX) 3091 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3092 #else 3093 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3094 #endif 3095 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3096 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3097 } else { 3098 /* TODO */ 3099 } 3100 } else { 3101 lwork = 0; 3102 } 3103 3104 nv = 0; 3105 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3106 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3107 } 3108 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3109 if (allocated_S_St) { 3110 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3111 } 3112 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3113 #if defined(PETSC_USE_COMPLEX) 3114 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3115 #endif 3116 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3117 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3118 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3119 nv+cum,&pcbddc->adaptive_constraints_idxs, 3120 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3121 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 3122 3123 maxneigs = 0; 3124 cum = cumarray = 0; 3125 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3126 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3127 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3128 const PetscInt *idxs; 3129 3130 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3131 for (cum=0;cum<nv;cum++) { 3132 pcbddc->adaptive_constraints_n[cum] = 1; 3133 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3134 pcbddc->adaptive_constraints_data[cum] = 1.0; 3135 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3136 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3137 } 3138 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3139 } 3140 3141 if (mss) { /* multilevel */ 3142 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3143 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3144 } 3145 3146 thresh = pcbddc->adaptive_threshold; 3147 for (i=0;i<sub_schurs->n_subs;i++) { 3148 const PetscInt *idxs; 3149 PetscReal upper,lower; 3150 PetscInt j,subset_size,eigs_start = 0; 3151 PetscBLASInt B_N; 3152 PetscBool same_data = PETSC_FALSE; 3153 3154 if (pcbddc->use_deluxe_scaling) { 3155 upper = PETSC_MAX_REAL; 3156 lower = thresh; 3157 } else { 3158 upper = 1./thresh; 3159 lower = 0.; 3160 } 3161 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3162 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3163 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3164 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3165 if (sub_schurs->is_hermitian) { 3166 PetscInt j,k; 3167 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 3168 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3169 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3170 } 3171 for (j=0;j<subset_size;j++) { 3172 for (k=j;k<subset_size;k++) { 3173 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3174 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3175 } 3176 } 3177 } else { 3178 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3179 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3180 } 3181 } else { 3182 S = Sarray + cumarray; 3183 St = Starray + cumarray; 3184 } 3185 /* see if we can save some work */ 3186 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3187 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 3188 } 3189 3190 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3191 B_neigs = 0; 3192 } else { 3193 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 3194 PetscBLASInt B_itype = 1; 3195 PetscBLASInt B_IL, B_IU; 3196 PetscReal eps = -1.0; /* dlamch? */ 3197 PetscInt nmin_s; 3198 PetscBool compute_range = PETSC_FALSE; 3199 3200 if (pcbddc->dbg_flag) { 3201 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]); 3202 } 3203 3204 compute_range = PETSC_FALSE; 3205 if (thresh > 1.+PETSC_SMALL && !same_data) { 3206 compute_range = PETSC_TRUE; 3207 } 3208 3209 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3210 if (compute_range) { 3211 3212 /* ask for eigenvalues larger than thresh */ 3213 #if defined(PETSC_USE_COMPLEX) 3214 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3215 #else 3216 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3217 #endif 3218 } else if (!same_data) { 3219 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3220 B_IL = 1; 3221 #if defined(PETSC_USE_COMPLEX) 3222 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3223 #else 3224 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3225 #endif 3226 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3227 PetscInt k; 3228 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3229 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3230 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3231 nmin = nmax; 3232 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 3233 for (k=0;k<nmax;k++) { 3234 eigs[k] = 1./PETSC_SMALL; 3235 eigv[k*(subset_size+1)] = 1.0; 3236 } 3237 } 3238 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3239 if (B_ierr) { 3240 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3241 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3242 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3243 } 3244 3245 if (B_neigs > nmax) { 3246 if (pcbddc->dbg_flag) { 3247 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 3248 } 3249 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 3250 B_neigs = nmax; 3251 } 3252 3253 nmin_s = PetscMin(nmin,B_N); 3254 if (B_neigs < nmin_s) { 3255 PetscBLASInt B_neigs2; 3256 3257 if (pcbddc->use_deluxe_scaling) { 3258 B_IL = B_N - nmin_s + 1; 3259 B_IU = B_N - B_neigs; 3260 } else { 3261 B_IL = B_neigs + 1; 3262 B_IU = nmin_s; 3263 } 3264 if (pcbddc->dbg_flag) { 3265 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 3266 } 3267 if (sub_schurs->is_hermitian) { 3268 PetscInt j,k; 3269 for (j=0;j<subset_size;j++) { 3270 for (k=j;k<subset_size;k++) { 3271 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3272 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3273 } 3274 } 3275 } else { 3276 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3277 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3278 } 3279 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3280 #if defined(PETSC_USE_COMPLEX) 3281 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3282 #else 3283 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3284 #endif 3285 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3286 B_neigs += B_neigs2; 3287 } 3288 if (B_ierr) { 3289 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3290 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3291 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3292 } 3293 if (pcbddc->dbg_flag) { 3294 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3295 for (j=0;j<B_neigs;j++) { 3296 if (eigs[j] == 0.0) { 3297 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3298 } else { 3299 if (pcbddc->use_deluxe_scaling) { 3300 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3301 } else { 3302 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3303 } 3304 } 3305 } 3306 } 3307 } else { 3308 /* TODO */ 3309 } 3310 } 3311 /* change the basis back to the original one */ 3312 if (sub_schurs->change) { 3313 Mat change,phi,phit; 3314 3315 if (pcbddc->dbg_flag > 2) { 3316 PetscInt ii; 3317 for (ii=0;ii<B_neigs;ii++) { 3318 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3319 for (j=0;j<B_N;j++) { 3320 #if defined(PETSC_USE_COMPLEX) 3321 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3322 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3323 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3324 #else 3325 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3326 #endif 3327 } 3328 } 3329 } 3330 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3331 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3332 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3333 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3334 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3335 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3336 } 3337 maxneigs = PetscMax(B_neigs,maxneigs); 3338 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3339 if (B_neigs) { 3340 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3341 3342 if (pcbddc->dbg_flag > 1) { 3343 PetscInt ii; 3344 for (ii=0;ii<B_neigs;ii++) { 3345 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3346 for (j=0;j<B_N;j++) { 3347 #if defined(PETSC_USE_COMPLEX) 3348 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3349 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3350 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3351 #else 3352 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3353 #endif 3354 } 3355 } 3356 } 3357 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3358 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3359 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3360 cum++; 3361 } 3362 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3363 /* shift for next computation */ 3364 cumarray += subset_size*subset_size; 3365 } 3366 if (pcbddc->dbg_flag) { 3367 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3368 } 3369 3370 if (mss) { 3371 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3372 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3373 /* destroy matrices (junk) */ 3374 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3375 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3376 } 3377 if (allocated_S_St) { 3378 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3379 } 3380 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3381 #if defined(PETSC_USE_COMPLEX) 3382 ierr = PetscFree(rwork);CHKERRQ(ierr); 3383 #endif 3384 if (pcbddc->dbg_flag) { 3385 PetscInt maxneigs_r; 3386 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3387 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3388 } 3389 PetscFunctionReturn(0); 3390 } 3391 3392 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3393 { 3394 PetscScalar *coarse_submat_vals; 3395 PetscErrorCode ierr; 3396 3397 PetscFunctionBegin; 3398 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3399 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3400 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3401 3402 /* Setup local neumann solver ksp_R */ 3403 /* PCBDDCSetUpLocalScatters should be called first! */ 3404 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3405 3406 /* 3407 Setup local correction and local part of coarse basis. 3408 Gives back the dense local part of the coarse matrix in column major ordering 3409 */ 3410 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3411 3412 /* Compute total number of coarse nodes and setup coarse solver */ 3413 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3414 3415 /* free */ 3416 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3417 PetscFunctionReturn(0); 3418 } 3419 3420 PetscErrorCode PCBDDCResetCustomization(PC pc) 3421 { 3422 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3423 PetscErrorCode ierr; 3424 3425 PetscFunctionBegin; 3426 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3427 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3428 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3429 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3430 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3431 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3432 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3433 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3434 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3435 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3436 PetscFunctionReturn(0); 3437 } 3438 3439 PetscErrorCode PCBDDCResetTopography(PC pc) 3440 { 3441 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3442 PetscInt i; 3443 PetscErrorCode ierr; 3444 3445 PetscFunctionBegin; 3446 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3447 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3448 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3449 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3450 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3451 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3452 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3453 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3454 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3455 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3456 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3457 for (i=0;i<pcbddc->n_local_subs;i++) { 3458 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3459 } 3460 pcbddc->n_local_subs = 0; 3461 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3462 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3463 pcbddc->graphanalyzed = PETSC_FALSE; 3464 pcbddc->recompute_topography = PETSC_TRUE; 3465 PetscFunctionReturn(0); 3466 } 3467 3468 PetscErrorCode PCBDDCResetSolvers(PC pc) 3469 { 3470 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3471 PetscErrorCode ierr; 3472 3473 PetscFunctionBegin; 3474 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3475 if (pcbddc->coarse_phi_B) { 3476 PetscScalar *array; 3477 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3478 ierr = PetscFree(array);CHKERRQ(ierr); 3479 } 3480 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3481 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3482 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3483 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3484 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3485 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3486 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3487 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3488 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3489 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3490 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3491 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3492 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3493 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3494 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3495 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3496 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3497 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3498 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3499 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3500 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3501 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3502 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3503 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3504 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3505 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3506 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3507 if (pcbddc->benign_zerodiag_subs) { 3508 PetscInt i; 3509 for (i=0;i<pcbddc->benign_n;i++) { 3510 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3511 } 3512 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3513 } 3514 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3515 PetscFunctionReturn(0); 3516 } 3517 3518 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3519 { 3520 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3521 PC_IS *pcis = (PC_IS*)pc->data; 3522 VecType impVecType; 3523 PetscInt n_constraints,n_R,old_size; 3524 PetscErrorCode ierr; 3525 3526 PetscFunctionBegin; 3527 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3528 n_R = pcis->n - pcbddc->n_vertices; 3529 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3530 /* local work vectors (try to avoid unneeded work)*/ 3531 /* R nodes */ 3532 old_size = -1; 3533 if (pcbddc->vec1_R) { 3534 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3535 } 3536 if (n_R != old_size) { 3537 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3538 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3539 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3540 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3541 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3542 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3543 } 3544 /* local primal dofs */ 3545 old_size = -1; 3546 if (pcbddc->vec1_P) { 3547 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3548 } 3549 if (pcbddc->local_primal_size != old_size) { 3550 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3551 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3552 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3553 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3554 } 3555 /* local explicit constraints */ 3556 old_size = -1; 3557 if (pcbddc->vec1_C) { 3558 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3559 } 3560 if (n_constraints && n_constraints != old_size) { 3561 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3562 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3563 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3564 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3565 } 3566 PetscFunctionReturn(0); 3567 } 3568 3569 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3570 { 3571 PetscErrorCode ierr; 3572 /* pointers to pcis and pcbddc */ 3573 PC_IS* pcis = (PC_IS*)pc->data; 3574 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3575 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3576 /* submatrices of local problem */ 3577 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3578 /* submatrices of local coarse problem */ 3579 Mat S_VV,S_CV,S_VC,S_CC; 3580 /* working matrices */ 3581 Mat C_CR; 3582 /* additional working stuff */ 3583 PC pc_R; 3584 Mat F,Brhs = NULL; 3585 Vec dummy_vec; 3586 PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs; 3587 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3588 PetscScalar *work; 3589 PetscInt *idx_V_B; 3590 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3591 PetscInt i,n_R,n_D,n_B; 3592 3593 /* some shortcuts to scalars */ 3594 PetscScalar one=1.0,m_one=-1.0; 3595 3596 PetscFunctionBegin; 3597 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3598 3599 /* Set Non-overlapping dimensions */ 3600 n_vertices = pcbddc->n_vertices; 3601 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3602 n_B = pcis->n_B; 3603 n_D = pcis->n - n_B; 3604 n_R = pcis->n - n_vertices; 3605 3606 /* vertices in boundary numbering */ 3607 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3608 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3609 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3610 3611 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3612 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3613 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3614 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3615 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3616 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3617 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3618 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3619 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3620 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3621 3622 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3623 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3624 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3625 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3626 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3627 lda_rhs = n_R; 3628 need_benign_correction = PETSC_FALSE; 3629 if (isLU || isILU || isCHOL) { 3630 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3631 } else if (sub_schurs && sub_schurs->reuse_solver) { 3632 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3633 MatFactorType type; 3634 3635 F = reuse_solver->F; 3636 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3637 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3638 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3639 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3640 } else { 3641 F = NULL; 3642 } 3643 3644 /* determine if we can use a sparse right-hand side */ 3645 sparserhs = PETSC_FALSE; 3646 if (F) { 3647 MatSolverType solver; 3648 3649 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3650 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3651 } 3652 3653 /* allocate workspace */ 3654 n = 0; 3655 if (n_constraints) { 3656 n += lda_rhs*n_constraints; 3657 } 3658 if (n_vertices) { 3659 n = PetscMax(2*lda_rhs*n_vertices,n); 3660 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3661 } 3662 if (!pcbddc->symmetric_primal) { 3663 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3664 } 3665 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3666 3667 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3668 dummy_vec = NULL; 3669 if (need_benign_correction && lda_rhs != n_R && F) { 3670 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3671 } 3672 3673 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3674 if (n_constraints) { 3675 Mat M3,C_B; 3676 IS is_aux; 3677 PetscScalar *array,*array2; 3678 3679 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3680 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3681 3682 /* Extract constraints on R nodes: C_{CR} */ 3683 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3684 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3685 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3686 3687 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3688 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3689 if (!sparserhs) { 3690 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3691 for (i=0;i<n_constraints;i++) { 3692 const PetscScalar *row_cmat_values; 3693 const PetscInt *row_cmat_indices; 3694 PetscInt size_of_constraint,j; 3695 3696 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3697 for (j=0;j<size_of_constraint;j++) { 3698 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3699 } 3700 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3701 } 3702 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 3703 } else { 3704 Mat tC_CR; 3705 3706 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3707 if (lda_rhs != n_R) { 3708 PetscScalar *aa; 3709 PetscInt r,*ii,*jj; 3710 PetscBool done; 3711 3712 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3713 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 3714 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 3715 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 3716 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3717 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 3718 } else { 3719 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 3720 tC_CR = C_CR; 3721 } 3722 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 3723 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 3724 } 3725 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3726 if (F) { 3727 if (need_benign_correction) { 3728 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3729 3730 /* rhs is already zero on interior dofs, no need to change the rhs */ 3731 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3732 } 3733 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 3734 if (need_benign_correction) { 3735 PetscScalar *marr; 3736 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3737 3738 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3739 if (lda_rhs != n_R) { 3740 for (i=0;i<n_constraints;i++) { 3741 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3742 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3743 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3744 } 3745 } else { 3746 for (i=0;i<n_constraints;i++) { 3747 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3748 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3749 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3750 } 3751 } 3752 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3753 } 3754 } else { 3755 PetscScalar *marr; 3756 3757 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3758 for (i=0;i<n_constraints;i++) { 3759 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3760 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3761 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3762 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3763 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3764 } 3765 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3766 } 3767 if (sparserhs) { 3768 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3769 } 3770 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3771 if (!pcbddc->switch_static) { 3772 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3773 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3774 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3775 for (i=0;i<n_constraints;i++) { 3776 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3777 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3778 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3779 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3780 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3781 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3782 } 3783 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3784 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3785 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3786 } else { 3787 if (lda_rhs != n_R) { 3788 IS dummy; 3789 3790 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3791 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3792 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3793 } else { 3794 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3795 pcbddc->local_auxmat2 = local_auxmat2_R; 3796 } 3797 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3798 } 3799 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3800 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3801 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3802 if (isCHOL) { 3803 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3804 } else { 3805 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3806 } 3807 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 3808 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3809 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3810 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3811 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3812 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3813 } 3814 3815 /* Get submatrices from subdomain matrix */ 3816 if (n_vertices) { 3817 IS is_aux; 3818 PetscBool isseqaij; 3819 3820 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3821 IS tis; 3822 3823 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3824 ierr = ISSort(tis);CHKERRQ(ierr); 3825 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3826 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3827 } else { 3828 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3829 } 3830 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3831 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3832 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3833 if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */ 3834 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3835 } 3836 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3837 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3838 } 3839 3840 /* Matrix of coarse basis functions (local) */ 3841 if (pcbddc->coarse_phi_B) { 3842 PetscInt on_B,on_primal,on_D=n_D; 3843 if (pcbddc->coarse_phi_D) { 3844 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3845 } 3846 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3847 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3848 PetscScalar *marray; 3849 3850 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3851 ierr = PetscFree(marray);CHKERRQ(ierr); 3852 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3853 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3854 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3855 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3856 } 3857 } 3858 3859 if (!pcbddc->coarse_phi_B) { 3860 PetscScalar *marr; 3861 3862 /* memory size */ 3863 n = n_B*pcbddc->local_primal_size; 3864 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 3865 if (!pcbddc->symmetric_primal) n *= 2; 3866 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 3867 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3868 marr += n_B*pcbddc->local_primal_size; 3869 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3870 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3871 marr += n_D*pcbddc->local_primal_size; 3872 } 3873 if (!pcbddc->symmetric_primal) { 3874 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3875 marr += n_B*pcbddc->local_primal_size; 3876 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3877 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3878 } 3879 } else { 3880 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3881 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3882 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3883 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3884 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3885 } 3886 } 3887 } 3888 3889 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3890 p0_lidx_I = NULL; 3891 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3892 const PetscInt *idxs; 3893 3894 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3895 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3896 for (i=0;i<pcbddc->benign_n;i++) { 3897 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3898 } 3899 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3900 } 3901 3902 /* vertices */ 3903 if (n_vertices) { 3904 PetscBool restoreavr = PETSC_FALSE; 3905 3906 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3907 3908 if (n_R) { 3909 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3910 PetscBLASInt B_N,B_one = 1; 3911 PetscScalar *x,*y; 3912 3913 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3914 if (need_benign_correction) { 3915 ISLocalToGlobalMapping RtoN; 3916 IS is_p0; 3917 PetscInt *idxs_p0,n; 3918 3919 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3920 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3921 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3922 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3923 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3924 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3925 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3926 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3927 } 3928 3929 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3930 if (!sparserhs || need_benign_correction) { 3931 if (lda_rhs == n_R) { 3932 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3933 } else { 3934 PetscScalar *av,*array; 3935 const PetscInt *xadj,*adjncy; 3936 PetscInt n; 3937 PetscBool flg_row; 3938 3939 array = work+lda_rhs*n_vertices; 3940 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3941 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3942 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3943 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3944 for (i=0;i<n;i++) { 3945 PetscInt j; 3946 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3947 } 3948 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3949 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3950 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3951 } 3952 if (need_benign_correction) { 3953 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3954 PetscScalar *marr; 3955 3956 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3957 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3958 3959 | 0 0 0 | (V) 3960 L = | 0 0 -1 | (P-p0) 3961 | 0 0 -1 | (p0) 3962 3963 */ 3964 for (i=0;i<reuse_solver->benign_n;i++) { 3965 const PetscScalar *vals; 3966 const PetscInt *idxs,*idxs_zero; 3967 PetscInt n,j,nz; 3968 3969 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3970 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3971 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3972 for (j=0;j<n;j++) { 3973 PetscScalar val = vals[j]; 3974 PetscInt k,col = idxs[j]; 3975 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3976 } 3977 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3978 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3979 } 3980 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3981 } 3982 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 3983 Brhs = A_RV; 3984 } else { 3985 Mat tA_RVT,A_RVT; 3986 3987 if (!pcbddc->symmetric_primal) { 3988 /* A_RV already scaled by -1 */ 3989 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 3990 } else { 3991 restoreavr = PETSC_TRUE; 3992 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 3993 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 3994 A_RVT = A_VR; 3995 } 3996 if (lda_rhs != n_R) { 3997 PetscScalar *aa; 3998 PetscInt r,*ii,*jj; 3999 PetscBool done; 4000 4001 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4002 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4003 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4004 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4005 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4006 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4007 } else { 4008 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4009 tA_RVT = A_RVT; 4010 } 4011 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4012 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4013 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4014 } 4015 if (F) { 4016 /* need to correct the rhs */ 4017 if (need_benign_correction) { 4018 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4019 PetscScalar *marr; 4020 4021 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4022 if (lda_rhs != n_R) { 4023 for (i=0;i<n_vertices;i++) { 4024 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4025 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4026 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4027 } 4028 } else { 4029 for (i=0;i<n_vertices;i++) { 4030 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4031 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4032 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4033 } 4034 } 4035 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4036 } 4037 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4038 if (restoreavr) { 4039 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4040 } 4041 /* need to correct the solution */ 4042 if (need_benign_correction) { 4043 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4044 PetscScalar *marr; 4045 4046 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4047 if (lda_rhs != n_R) { 4048 for (i=0;i<n_vertices;i++) { 4049 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4050 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4051 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4052 } 4053 } else { 4054 for (i=0;i<n_vertices;i++) { 4055 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4056 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4057 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4058 } 4059 } 4060 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4061 } 4062 } else { 4063 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4064 for (i=0;i<n_vertices;i++) { 4065 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4066 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4067 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4068 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4069 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4070 } 4071 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4072 } 4073 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4074 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4075 /* S_VV and S_CV */ 4076 if (n_constraints) { 4077 Mat B; 4078 4079 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4080 for (i=0;i<n_vertices;i++) { 4081 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4082 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4083 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4084 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4085 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4086 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4087 } 4088 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4089 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 4090 ierr = MatDestroy(&B);CHKERRQ(ierr); 4091 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4092 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4093 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4094 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4095 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4096 ierr = MatDestroy(&B);CHKERRQ(ierr); 4097 } 4098 if (lda_rhs != n_R) { 4099 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4100 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4101 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4102 } 4103 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4104 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4105 if (need_benign_correction) { 4106 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4107 PetscScalar *marr,*sums; 4108 4109 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4110 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4111 for (i=0;i<reuse_solver->benign_n;i++) { 4112 const PetscScalar *vals; 4113 const PetscInt *idxs,*idxs_zero; 4114 PetscInt n,j,nz; 4115 4116 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4117 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4118 for (j=0;j<n_vertices;j++) { 4119 PetscInt k; 4120 sums[j] = 0.; 4121 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4122 } 4123 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4124 for (j=0;j<n;j++) { 4125 PetscScalar val = vals[j]; 4126 PetscInt k; 4127 for (k=0;k<n_vertices;k++) { 4128 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4129 } 4130 } 4131 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4132 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4133 } 4134 ierr = PetscFree(sums);CHKERRQ(ierr); 4135 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4136 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4137 } 4138 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4139 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4140 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 4141 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4142 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4143 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 4144 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4145 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4146 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4147 } else { 4148 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4149 } 4150 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4151 4152 /* coarse basis functions */ 4153 for (i=0;i<n_vertices;i++) { 4154 PetscScalar *y; 4155 4156 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4157 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4158 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4159 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4160 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4161 y[n_B*i+idx_V_B[i]] = 1.0; 4162 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4163 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4164 4165 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4166 PetscInt j; 4167 4168 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4169 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4170 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4171 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4172 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4173 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4174 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4175 } 4176 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4177 } 4178 /* if n_R == 0 the object is not destroyed */ 4179 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4180 } 4181 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4182 4183 if (n_constraints) { 4184 Mat B; 4185 4186 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4187 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4188 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4189 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4190 if (n_vertices) { 4191 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4192 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4193 } else { 4194 Mat S_VCt; 4195 4196 if (lda_rhs != n_R) { 4197 ierr = MatDestroy(&B);CHKERRQ(ierr); 4198 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4199 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4200 } 4201 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4202 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4203 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4204 } 4205 } 4206 ierr = MatDestroy(&B);CHKERRQ(ierr); 4207 /* coarse basis functions */ 4208 for (i=0;i<n_constraints;i++) { 4209 PetscScalar *y; 4210 4211 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4212 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4213 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4214 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4215 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4216 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4217 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4218 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4219 PetscInt j; 4220 4221 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4222 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4223 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4224 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4225 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4226 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4227 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4228 } 4229 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4230 } 4231 } 4232 if (n_constraints) { 4233 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4234 } 4235 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4236 4237 /* coarse matrix entries relative to B_0 */ 4238 if (pcbddc->benign_n) { 4239 Mat B0_B,B0_BPHI; 4240 IS is_dummy; 4241 PetscScalar *data; 4242 PetscInt j; 4243 4244 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4245 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4246 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4247 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4248 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4249 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 4250 for (j=0;j<pcbddc->benign_n;j++) { 4251 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4252 for (i=0;i<pcbddc->local_primal_size;i++) { 4253 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4254 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4255 } 4256 } 4257 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 4258 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4259 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4260 } 4261 4262 /* compute other basis functions for non-symmetric problems */ 4263 if (!pcbddc->symmetric_primal) { 4264 Mat B_V=NULL,B_C=NULL; 4265 PetscScalar *marray; 4266 4267 if (n_constraints) { 4268 Mat S_CCT,C_CRT; 4269 4270 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4271 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4272 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4273 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4274 if (n_vertices) { 4275 Mat S_VCT; 4276 4277 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4278 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4279 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4280 } 4281 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4282 } else { 4283 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4284 } 4285 if (n_vertices && n_R) { 4286 PetscScalar *av,*marray; 4287 const PetscInt *xadj,*adjncy; 4288 PetscInt n; 4289 PetscBool flg_row; 4290 4291 /* B_V = B_V - A_VR^T */ 4292 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4293 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4294 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4295 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4296 for (i=0;i<n;i++) { 4297 PetscInt j; 4298 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4299 } 4300 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4301 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4302 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4303 } 4304 4305 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4306 if (n_vertices) { 4307 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4308 for (i=0;i<n_vertices;i++) { 4309 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4310 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4311 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4312 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4313 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4314 } 4315 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4316 } 4317 if (B_C) { 4318 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4319 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4320 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4321 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4322 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4323 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4324 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4325 } 4326 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4327 } 4328 /* coarse basis functions */ 4329 for (i=0;i<pcbddc->local_primal_size;i++) { 4330 PetscScalar *y; 4331 4332 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4333 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4334 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4335 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4336 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4337 if (i<n_vertices) { 4338 y[n_B*i+idx_V_B[i]] = 1.0; 4339 } 4340 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4341 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4342 4343 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4344 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4345 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4346 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4347 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4348 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4349 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4350 } 4351 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4352 } 4353 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4354 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4355 } 4356 4357 /* free memory */ 4358 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4359 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4360 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4361 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4362 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4363 ierr = PetscFree(work);CHKERRQ(ierr); 4364 if (n_vertices) { 4365 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4366 } 4367 if (n_constraints) { 4368 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4369 } 4370 /* Checking coarse_sub_mat and coarse basis functios */ 4371 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4372 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4373 if (pcbddc->dbg_flag) { 4374 Mat coarse_sub_mat; 4375 Mat AUXMAT,TM1,TM2,TM3,TM4; 4376 Mat coarse_phi_D,coarse_phi_B; 4377 Mat coarse_psi_D,coarse_psi_B; 4378 Mat A_II,A_BB,A_IB,A_BI; 4379 Mat C_B,CPHI; 4380 IS is_dummy; 4381 Vec mones; 4382 MatType checkmattype=MATSEQAIJ; 4383 PetscReal real_value; 4384 4385 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4386 Mat A; 4387 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4388 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4389 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4390 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4391 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4392 ierr = MatDestroy(&A);CHKERRQ(ierr); 4393 } else { 4394 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4395 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4396 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4397 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4398 } 4399 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4400 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4401 if (!pcbddc->symmetric_primal) { 4402 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4403 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4404 } 4405 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4406 4407 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4408 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4409 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4410 if (!pcbddc->symmetric_primal) { 4411 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4412 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4413 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4414 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4415 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4416 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4417 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4418 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4419 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4420 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4421 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4422 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4423 } else { 4424 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4425 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4426 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4427 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4428 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4429 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4430 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4431 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4432 } 4433 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4434 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4435 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4436 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4437 if (pcbddc->benign_n) { 4438 Mat B0_B,B0_BPHI; 4439 PetscScalar *data,*data2; 4440 PetscInt j; 4441 4442 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4443 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4444 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4445 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4446 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4447 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4448 for (j=0;j<pcbddc->benign_n;j++) { 4449 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4450 for (i=0;i<pcbddc->local_primal_size;i++) { 4451 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4452 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4453 } 4454 } 4455 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4456 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4457 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4458 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4459 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4460 } 4461 #if 0 4462 { 4463 PetscViewer viewer; 4464 char filename[256]; 4465 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4466 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4467 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4468 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4469 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4470 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4471 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4472 if (pcbddc->coarse_phi_B) { 4473 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4474 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4475 } 4476 if (pcbddc->coarse_phi_D) { 4477 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4478 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4479 } 4480 if (pcbddc->coarse_psi_B) { 4481 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4482 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4483 } 4484 if (pcbddc->coarse_psi_D) { 4485 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4486 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4487 } 4488 ierr = PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");CHKERRQ(ierr); 4489 ierr = MatView(pcbddc->local_mat,viewer);CHKERRQ(ierr); 4490 ierr = PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");CHKERRQ(ierr); 4491 ierr = MatView(pcbddc->ConstraintMatrix,viewer);CHKERRQ(ierr); 4492 ierr = PetscObjectSetName((PetscObject)pcis->is_I_local,"I");CHKERRQ(ierr); 4493 ierr = ISView(pcis->is_I_local,viewer);CHKERRQ(ierr); 4494 ierr = PetscObjectSetName((PetscObject)pcis->is_B_local,"B");CHKERRQ(ierr); 4495 ierr = ISView(pcis->is_B_local,viewer);CHKERRQ(ierr); 4496 ierr = PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");CHKERRQ(ierr); 4497 ierr = ISView(pcbddc->is_R_local,viewer);CHKERRQ(ierr); 4498 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4499 } 4500 #endif 4501 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4502 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4503 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4504 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4505 4506 /* check constraints */ 4507 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4508 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4509 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4510 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4511 } else { 4512 PetscScalar *data; 4513 Mat tmat; 4514 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4515 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4516 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4517 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4518 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4519 } 4520 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4521 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4522 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4523 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4524 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4525 if (!pcbddc->symmetric_primal) { 4526 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4527 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4528 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4529 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4530 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4531 } 4532 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4533 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4534 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4535 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4536 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4537 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4538 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4539 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4540 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4541 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4542 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4543 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4544 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4545 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4546 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4547 if (!pcbddc->symmetric_primal) { 4548 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4549 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4550 } 4551 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4552 } 4553 /* get back data */ 4554 *coarse_submat_vals_n = coarse_submat_vals; 4555 PetscFunctionReturn(0); 4556 } 4557 4558 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4559 { 4560 Mat *work_mat; 4561 IS isrow_s,iscol_s; 4562 PetscBool rsorted,csorted; 4563 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4564 PetscErrorCode ierr; 4565 4566 PetscFunctionBegin; 4567 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4568 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4569 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4570 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4571 4572 if (!rsorted) { 4573 const PetscInt *idxs; 4574 PetscInt *idxs_sorted,i; 4575 4576 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4577 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4578 for (i=0;i<rsize;i++) { 4579 idxs_perm_r[i] = i; 4580 } 4581 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4582 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4583 for (i=0;i<rsize;i++) { 4584 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4585 } 4586 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4587 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4588 } else { 4589 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4590 isrow_s = isrow; 4591 } 4592 4593 if (!csorted) { 4594 if (isrow == iscol) { 4595 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4596 iscol_s = isrow_s; 4597 } else { 4598 const PetscInt *idxs; 4599 PetscInt *idxs_sorted,i; 4600 4601 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4602 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4603 for (i=0;i<csize;i++) { 4604 idxs_perm_c[i] = i; 4605 } 4606 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4607 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4608 for (i=0;i<csize;i++) { 4609 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4610 } 4611 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4612 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4613 } 4614 } else { 4615 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4616 iscol_s = iscol; 4617 } 4618 4619 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4620 4621 if (!rsorted || !csorted) { 4622 Mat new_mat; 4623 IS is_perm_r,is_perm_c; 4624 4625 if (!rsorted) { 4626 PetscInt *idxs_r,i; 4627 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4628 for (i=0;i<rsize;i++) { 4629 idxs_r[idxs_perm_r[i]] = i; 4630 } 4631 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4632 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4633 } else { 4634 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4635 } 4636 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4637 4638 if (!csorted) { 4639 if (isrow_s == iscol_s) { 4640 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4641 is_perm_c = is_perm_r; 4642 } else { 4643 PetscInt *idxs_c,i; 4644 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4645 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4646 for (i=0;i<csize;i++) { 4647 idxs_c[idxs_perm_c[i]] = i; 4648 } 4649 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4650 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4651 } 4652 } else { 4653 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4654 } 4655 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4656 4657 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4658 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4659 work_mat[0] = new_mat; 4660 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4661 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4662 } 4663 4664 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4665 *B = work_mat[0]; 4666 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4667 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4668 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4669 PetscFunctionReturn(0); 4670 } 4671 4672 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4673 { 4674 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4675 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4676 Mat new_mat,lA; 4677 IS is_local,is_global; 4678 PetscInt local_size; 4679 PetscBool isseqaij; 4680 PetscErrorCode ierr; 4681 4682 PetscFunctionBegin; 4683 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4684 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4685 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4686 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4687 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4688 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4689 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4690 4691 /* check */ 4692 if (pcbddc->dbg_flag) { 4693 Vec x,x_change; 4694 PetscReal error; 4695 4696 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4697 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4698 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4699 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4700 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4701 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4702 if (!pcbddc->change_interior) { 4703 const PetscScalar *x,*y,*v; 4704 PetscReal lerror = 0.; 4705 PetscInt i; 4706 4707 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4708 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4709 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4710 for (i=0;i<local_size;i++) 4711 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4712 lerror = PetscAbsScalar(x[i]-y[i]); 4713 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4714 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4715 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4716 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4717 if (error > PETSC_SMALL) { 4718 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4719 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4720 } else { 4721 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4722 } 4723 } 4724 } 4725 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4726 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4727 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4728 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4729 if (error > PETSC_SMALL) { 4730 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4731 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4732 } else { 4733 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4734 } 4735 } 4736 ierr = VecDestroy(&x);CHKERRQ(ierr); 4737 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4738 } 4739 4740 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4741 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4742 4743 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4744 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4745 if (isseqaij) { 4746 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4747 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4748 if (lA) { 4749 Mat work; 4750 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4751 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4752 ierr = MatDestroy(&work);CHKERRQ(ierr); 4753 } 4754 } else { 4755 Mat work_mat; 4756 4757 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4758 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4759 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4760 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4761 if (lA) { 4762 Mat work; 4763 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4764 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4765 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4766 ierr = MatDestroy(&work);CHKERRQ(ierr); 4767 } 4768 } 4769 if (matis->A->symmetric_set) { 4770 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4771 #if !defined(PETSC_USE_COMPLEX) 4772 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4773 #endif 4774 } 4775 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4776 PetscFunctionReturn(0); 4777 } 4778 4779 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4780 { 4781 PC_IS* pcis = (PC_IS*)(pc->data); 4782 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4783 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4784 PetscInt *idx_R_local=NULL; 4785 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4786 PetscInt vbs,bs; 4787 PetscBT bitmask=NULL; 4788 PetscErrorCode ierr; 4789 4790 PetscFunctionBegin; 4791 /* 4792 No need to setup local scatters if 4793 - primal space is unchanged 4794 AND 4795 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4796 AND 4797 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4798 */ 4799 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4800 PetscFunctionReturn(0); 4801 } 4802 /* destroy old objects */ 4803 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4804 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4805 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4806 /* Set Non-overlapping dimensions */ 4807 n_B = pcis->n_B; 4808 n_D = pcis->n - n_B; 4809 n_vertices = pcbddc->n_vertices; 4810 4811 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4812 4813 /* create auxiliary bitmask and allocate workspace */ 4814 if (!sub_schurs || !sub_schurs->reuse_solver) { 4815 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4816 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4817 for (i=0;i<n_vertices;i++) { 4818 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4819 } 4820 4821 for (i=0, n_R=0; i<pcis->n; i++) { 4822 if (!PetscBTLookup(bitmask,i)) { 4823 idx_R_local[n_R++] = i; 4824 } 4825 } 4826 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4827 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4828 4829 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4830 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4831 } 4832 4833 /* Block code */ 4834 vbs = 1; 4835 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4836 if (bs>1 && !(n_vertices%bs)) { 4837 PetscBool is_blocked = PETSC_TRUE; 4838 PetscInt *vary; 4839 if (!sub_schurs || !sub_schurs->reuse_solver) { 4840 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4841 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4842 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4843 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4844 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4845 for (i=0; i<pcis->n/bs; i++) { 4846 if (vary[i]!=0 && vary[i]!=bs) { 4847 is_blocked = PETSC_FALSE; 4848 break; 4849 } 4850 } 4851 ierr = PetscFree(vary);CHKERRQ(ierr); 4852 } else { 4853 /* Verify directly the R set */ 4854 for (i=0; i<n_R/bs; i++) { 4855 PetscInt j,node=idx_R_local[bs*i]; 4856 for (j=1; j<bs; j++) { 4857 if (node != idx_R_local[bs*i+j]-j) { 4858 is_blocked = PETSC_FALSE; 4859 break; 4860 } 4861 } 4862 } 4863 } 4864 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4865 vbs = bs; 4866 for (i=0;i<n_R/vbs;i++) { 4867 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4868 } 4869 } 4870 } 4871 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4872 if (sub_schurs && sub_schurs->reuse_solver) { 4873 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4874 4875 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4876 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4877 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4878 reuse_solver->is_R = pcbddc->is_R_local; 4879 } else { 4880 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4881 } 4882 4883 /* print some info if requested */ 4884 if (pcbddc->dbg_flag) { 4885 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4886 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4887 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4888 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4889 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4890 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4891 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4892 } 4893 4894 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4895 if (!sub_schurs || !sub_schurs->reuse_solver) { 4896 IS is_aux1,is_aux2; 4897 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4898 4899 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4900 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4901 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4902 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4903 for (i=0; i<n_D; i++) { 4904 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4905 } 4906 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4907 for (i=0, j=0; i<n_R; i++) { 4908 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4909 aux_array1[j++] = i; 4910 } 4911 } 4912 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4913 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4914 for (i=0, j=0; i<n_B; i++) { 4915 if (!PetscBTLookup(bitmask,is_indices[i])) { 4916 aux_array2[j++] = i; 4917 } 4918 } 4919 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4920 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4921 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4922 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4923 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4924 4925 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4926 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4927 for (i=0, j=0; i<n_R; i++) { 4928 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4929 aux_array1[j++] = i; 4930 } 4931 } 4932 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4933 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4934 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4935 } 4936 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4937 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4938 } else { 4939 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4940 IS tis; 4941 PetscInt schur_size; 4942 4943 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4944 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4945 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4946 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4947 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4948 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4949 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4950 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4951 } 4952 } 4953 PetscFunctionReturn(0); 4954 } 4955 4956 4957 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4958 { 4959 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4960 PC_IS *pcis = (PC_IS*)pc->data; 4961 PC pc_temp; 4962 Mat A_RR; 4963 MatReuse reuse; 4964 PetscScalar m_one = -1.0; 4965 PetscReal value; 4966 PetscInt n_D,n_R; 4967 PetscBool check_corr,issbaij; 4968 PetscErrorCode ierr; 4969 /* prefixes stuff */ 4970 char dir_prefix[256],neu_prefix[256],str_level[16]; 4971 size_t len; 4972 4973 PetscFunctionBegin; 4974 4975 /* compute prefixes */ 4976 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4977 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4978 if (!pcbddc->current_level) { 4979 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4980 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4981 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4982 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4983 } else { 4984 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 4985 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4986 len -= 15; /* remove "pc_bddc_coarse_" */ 4987 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4988 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4989 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4990 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4991 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4992 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4993 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4994 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4995 } 4996 4997 /* DIRICHLET PROBLEM */ 4998 if (dirichlet) { 4999 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5000 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5001 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 5002 if (pcbddc->dbg_flag) { 5003 Mat A_IIn; 5004 5005 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5006 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5007 pcis->A_II = A_IIn; 5008 } 5009 } 5010 if (pcbddc->local_mat->symmetric_set) { 5011 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5012 } 5013 /* Matrix for Dirichlet problem is pcis->A_II */ 5014 n_D = pcis->n - pcis->n_B; 5015 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5016 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5017 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5018 /* default */ 5019 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5020 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5021 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5022 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5023 if (issbaij) { 5024 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5025 } else { 5026 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5027 } 5028 /* Allow user's customization */ 5029 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5030 } 5031 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 5032 if (sub_schurs && sub_schurs->reuse_solver) { 5033 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5034 5035 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5036 } 5037 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5038 if (!n_D) { 5039 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5040 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5041 } 5042 /* Set Up KSP for Dirichlet problem of BDDC */ 5043 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 5044 /* set ksp_D into pcis data */ 5045 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5046 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5047 pcis->ksp_D = pcbddc->ksp_D; 5048 } 5049 5050 /* NEUMANN PROBLEM */ 5051 A_RR = 0; 5052 if (neumann) { 5053 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5054 PetscInt ibs,mbs; 5055 PetscBool issbaij, reuse_neumann_solver; 5056 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5057 5058 reuse_neumann_solver = PETSC_FALSE; 5059 if (sub_schurs && sub_schurs->reuse_solver) { 5060 IS iP; 5061 5062 reuse_neumann_solver = PETSC_TRUE; 5063 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5064 if (iP) reuse_neumann_solver = PETSC_FALSE; 5065 } 5066 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5067 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5068 if (pcbddc->ksp_R) { /* already created ksp */ 5069 PetscInt nn_R; 5070 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5071 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5072 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5073 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5074 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5075 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5076 reuse = MAT_INITIAL_MATRIX; 5077 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5078 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5079 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5080 reuse = MAT_INITIAL_MATRIX; 5081 } else { /* safe to reuse the matrix */ 5082 reuse = MAT_REUSE_MATRIX; 5083 } 5084 } 5085 /* last check */ 5086 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5087 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5088 reuse = MAT_INITIAL_MATRIX; 5089 } 5090 } else { /* first time, so we need to create the matrix */ 5091 reuse = MAT_INITIAL_MATRIX; 5092 } 5093 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 5094 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5095 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5096 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5097 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5098 if (matis->A == pcbddc->local_mat) { 5099 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5100 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5101 } else { 5102 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5103 } 5104 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5105 if (matis->A == pcbddc->local_mat) { 5106 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5107 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5108 } else { 5109 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5110 } 5111 } 5112 /* extract A_RR */ 5113 if (reuse_neumann_solver) { 5114 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5115 5116 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5117 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5118 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5119 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5120 } else { 5121 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5122 } 5123 } else { 5124 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5125 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5126 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5127 } 5128 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5129 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5130 } 5131 if (pcbddc->local_mat->symmetric_set) { 5132 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5133 } 5134 if (!pcbddc->ksp_R) { /* create object if not present */ 5135 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5136 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5137 /* default */ 5138 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5139 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5140 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5141 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5142 if (issbaij) { 5143 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5144 } else { 5145 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5146 } 5147 /* Allow user's customization */ 5148 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5149 } 5150 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5151 if (!n_R) { 5152 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5153 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5154 } 5155 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5156 /* Reuse solver if it is present */ 5157 if (reuse_neumann_solver) { 5158 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5159 5160 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5161 } 5162 /* Set Up KSP for Neumann problem of BDDC */ 5163 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 5164 } 5165 5166 if (pcbddc->dbg_flag) { 5167 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5168 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5169 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5170 } 5171 5172 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5173 check_corr = PETSC_FALSE; 5174 if (pcbddc->NullSpace_corr[0]) { 5175 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5176 } 5177 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5178 check_corr = PETSC_TRUE; 5179 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5180 } 5181 if (neumann && pcbddc->NullSpace_corr[2]) { 5182 check_corr = PETSC_TRUE; 5183 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5184 } 5185 /* check Dirichlet and Neumann solvers */ 5186 if (pcbddc->dbg_flag) { 5187 if (dirichlet) { /* Dirichlet */ 5188 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5189 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5190 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5191 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5192 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5193 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5194 if (check_corr) { 5195 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 5196 } 5197 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5198 } 5199 if (neumann) { /* Neumann */ 5200 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5201 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5202 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5203 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5204 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5205 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5206 if (check_corr) { 5207 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 5208 } 5209 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5210 } 5211 } 5212 /* free Neumann problem's matrix */ 5213 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5214 PetscFunctionReturn(0); 5215 } 5216 5217 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5218 { 5219 PetscErrorCode ierr; 5220 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5221 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5222 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 5223 5224 PetscFunctionBegin; 5225 if (!reuse_solver) { 5226 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5227 } 5228 if (!pcbddc->switch_static) { 5229 if (applytranspose && pcbddc->local_auxmat1) { 5230 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5231 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5232 } 5233 if (!reuse_solver) { 5234 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5235 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5236 } else { 5237 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5238 5239 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5240 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5241 } 5242 } else { 5243 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5244 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5245 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5246 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5247 if (applytranspose && pcbddc->local_auxmat1) { 5248 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5249 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5250 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5251 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5252 } 5253 } 5254 if (!reuse_solver || pcbddc->switch_static) { 5255 if (applytranspose) { 5256 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5257 } else { 5258 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5259 } 5260 } else { 5261 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5262 5263 if (applytranspose) { 5264 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5265 } else { 5266 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5267 } 5268 } 5269 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5270 if (!pcbddc->switch_static) { 5271 if (!reuse_solver) { 5272 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5273 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5274 } else { 5275 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5276 5277 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5278 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5279 } 5280 if (!applytranspose && pcbddc->local_auxmat1) { 5281 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5282 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5283 } 5284 } else { 5285 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5286 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5287 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5288 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5289 if (!applytranspose && pcbddc->local_auxmat1) { 5290 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5291 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5292 } 5293 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5294 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5295 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5296 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5297 } 5298 PetscFunctionReturn(0); 5299 } 5300 5301 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5302 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5303 { 5304 PetscErrorCode ierr; 5305 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5306 PC_IS* pcis = (PC_IS*) (pc->data); 5307 const PetscScalar zero = 0.0; 5308 5309 PetscFunctionBegin; 5310 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5311 if (!pcbddc->benign_apply_coarse_only) { 5312 if (applytranspose) { 5313 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5314 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5315 } else { 5316 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5317 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5318 } 5319 } else { 5320 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5321 } 5322 5323 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5324 if (pcbddc->benign_n) { 5325 PetscScalar *array; 5326 PetscInt j; 5327 5328 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5329 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5330 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5331 } 5332 5333 /* start communications from local primal nodes to rhs of coarse solver */ 5334 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5335 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5336 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5337 5338 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5339 if (pcbddc->coarse_ksp) { 5340 Mat coarse_mat; 5341 Vec rhs,sol; 5342 MatNullSpace nullsp; 5343 PetscBool isbddc = PETSC_FALSE; 5344 5345 if (pcbddc->benign_have_null) { 5346 PC coarse_pc; 5347 5348 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5349 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5350 /* we need to propagate to coarser levels the need for a possible benign correction */ 5351 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5352 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5353 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5354 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5355 } 5356 } 5357 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5358 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5359 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5360 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5361 if (nullsp) { 5362 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5363 } 5364 if (applytranspose) { 5365 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5366 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5367 } else { 5368 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5369 PC coarse_pc; 5370 5371 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5372 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5373 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5374 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5375 } else { 5376 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5377 } 5378 } 5379 /* we don't need the benign correction at coarser levels anymore */ 5380 if (pcbddc->benign_have_null && isbddc) { 5381 PC coarse_pc; 5382 PC_BDDC* coarsepcbddc; 5383 5384 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5385 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5386 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5387 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5388 } 5389 if (nullsp) { 5390 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5391 } 5392 } 5393 5394 /* Local solution on R nodes */ 5395 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5396 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5397 } 5398 /* communications from coarse sol to local primal nodes */ 5399 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5400 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5401 5402 /* Sum contributions from the two levels */ 5403 if (!pcbddc->benign_apply_coarse_only) { 5404 if (applytranspose) { 5405 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5406 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5407 } else { 5408 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5409 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5410 } 5411 /* store p0 */ 5412 if (pcbddc->benign_n) { 5413 PetscScalar *array; 5414 PetscInt j; 5415 5416 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5417 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5418 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5419 } 5420 } else { /* expand the coarse solution */ 5421 if (applytranspose) { 5422 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5423 } else { 5424 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5425 } 5426 } 5427 PetscFunctionReturn(0); 5428 } 5429 5430 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5431 { 5432 PetscErrorCode ierr; 5433 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5434 PetscScalar *array; 5435 Vec from,to; 5436 5437 PetscFunctionBegin; 5438 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5439 from = pcbddc->coarse_vec; 5440 to = pcbddc->vec1_P; 5441 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5442 Vec tvec; 5443 5444 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5445 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5446 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5447 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5448 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5449 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5450 } 5451 } else { /* from local to global -> put data in coarse right hand side */ 5452 from = pcbddc->vec1_P; 5453 to = pcbddc->coarse_vec; 5454 } 5455 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5456 PetscFunctionReturn(0); 5457 } 5458 5459 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5460 { 5461 PetscErrorCode ierr; 5462 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5463 PetscScalar *array; 5464 Vec from,to; 5465 5466 PetscFunctionBegin; 5467 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5468 from = pcbddc->coarse_vec; 5469 to = pcbddc->vec1_P; 5470 } else { /* from local to global -> put data in coarse right hand side */ 5471 from = pcbddc->vec1_P; 5472 to = pcbddc->coarse_vec; 5473 } 5474 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5475 if (smode == SCATTER_FORWARD) { 5476 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5477 Vec tvec; 5478 5479 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5480 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5481 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5482 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5483 } 5484 } else { 5485 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5486 ierr = VecResetArray(from);CHKERRQ(ierr); 5487 } 5488 } 5489 PetscFunctionReturn(0); 5490 } 5491 5492 /* uncomment for testing purposes */ 5493 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5494 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5495 { 5496 PetscErrorCode ierr; 5497 PC_IS* pcis = (PC_IS*)(pc->data); 5498 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5499 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5500 /* one and zero */ 5501 PetscScalar one=1.0,zero=0.0; 5502 /* space to store constraints and their local indices */ 5503 PetscScalar *constraints_data; 5504 PetscInt *constraints_idxs,*constraints_idxs_B; 5505 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5506 PetscInt *constraints_n; 5507 /* iterators */ 5508 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5509 /* BLAS integers */ 5510 PetscBLASInt lwork,lierr; 5511 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5512 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5513 /* reuse */ 5514 PetscInt olocal_primal_size,olocal_primal_size_cc; 5515 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5516 /* change of basis */ 5517 PetscBool qr_needed; 5518 PetscBT change_basis,qr_needed_idx; 5519 /* auxiliary stuff */ 5520 PetscInt *nnz,*is_indices; 5521 PetscInt ncc; 5522 /* some quantities */ 5523 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5524 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5525 5526 PetscFunctionBegin; 5527 /* Destroy Mat objects computed previously */ 5528 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5529 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5530 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5531 /* save info on constraints from previous setup (if any) */ 5532 olocal_primal_size = pcbddc->local_primal_size; 5533 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5534 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5535 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5536 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5537 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5538 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5539 5540 if (!pcbddc->adaptive_selection) { 5541 IS ISForVertices,*ISForFaces,*ISForEdges; 5542 MatNullSpace nearnullsp; 5543 const Vec *nearnullvecs; 5544 Vec *localnearnullsp; 5545 PetscScalar *array; 5546 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5547 PetscBool nnsp_has_cnst; 5548 /* LAPACK working arrays for SVD or POD */ 5549 PetscBool skip_lapack,boolforchange; 5550 PetscScalar *work; 5551 PetscReal *singular_vals; 5552 #if defined(PETSC_USE_COMPLEX) 5553 PetscReal *rwork; 5554 #endif 5555 #if defined(PETSC_MISSING_LAPACK_GESVD) 5556 PetscScalar *temp_basis,*correlation_mat; 5557 #else 5558 PetscBLASInt dummy_int=1; 5559 PetscScalar dummy_scalar=1.; 5560 #endif 5561 5562 /* Get index sets for faces, edges and vertices from graph */ 5563 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5564 /* print some info */ 5565 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5566 PetscInt nv; 5567 5568 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5569 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5570 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5571 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5572 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5573 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5574 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5575 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5576 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5577 } 5578 5579 /* free unneeded index sets */ 5580 if (!pcbddc->use_vertices) { 5581 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5582 } 5583 if (!pcbddc->use_edges) { 5584 for (i=0;i<n_ISForEdges;i++) { 5585 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5586 } 5587 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5588 n_ISForEdges = 0; 5589 } 5590 if (!pcbddc->use_faces) { 5591 for (i=0;i<n_ISForFaces;i++) { 5592 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5593 } 5594 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5595 n_ISForFaces = 0; 5596 } 5597 5598 /* check if near null space is attached to global mat */ 5599 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5600 if (nearnullsp) { 5601 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5602 /* remove any stored info */ 5603 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5604 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5605 /* store information for BDDC solver reuse */ 5606 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5607 pcbddc->onearnullspace = nearnullsp; 5608 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5609 for (i=0;i<nnsp_size;i++) { 5610 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5611 } 5612 } else { /* if near null space is not provided BDDC uses constants by default */ 5613 nnsp_size = 0; 5614 nnsp_has_cnst = PETSC_TRUE; 5615 } 5616 /* get max number of constraints on a single cc */ 5617 max_constraints = nnsp_size; 5618 if (nnsp_has_cnst) max_constraints++; 5619 5620 /* 5621 Evaluate maximum storage size needed by the procedure 5622 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5623 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5624 There can be multiple constraints per connected component 5625 */ 5626 n_vertices = 0; 5627 if (ISForVertices) { 5628 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5629 } 5630 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5631 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5632 5633 total_counts = n_ISForFaces+n_ISForEdges; 5634 total_counts *= max_constraints; 5635 total_counts += n_vertices; 5636 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5637 5638 total_counts = 0; 5639 max_size_of_constraint = 0; 5640 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5641 IS used_is; 5642 if (i<n_ISForEdges) { 5643 used_is = ISForEdges[i]; 5644 } else { 5645 used_is = ISForFaces[i-n_ISForEdges]; 5646 } 5647 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5648 total_counts += j; 5649 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5650 } 5651 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5652 5653 /* get local part of global near null space vectors */ 5654 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5655 for (k=0;k<nnsp_size;k++) { 5656 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5657 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5658 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5659 } 5660 5661 /* whether or not to skip lapack calls */ 5662 skip_lapack = PETSC_TRUE; 5663 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5664 5665 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5666 if (!skip_lapack) { 5667 PetscScalar temp_work; 5668 5669 #if defined(PETSC_MISSING_LAPACK_GESVD) 5670 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5671 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5672 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5673 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5674 #if defined(PETSC_USE_COMPLEX) 5675 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5676 #endif 5677 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5678 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5679 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5680 lwork = -1; 5681 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5682 #if !defined(PETSC_USE_COMPLEX) 5683 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5684 #else 5685 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5686 #endif 5687 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5688 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5689 #else /* on missing GESVD */ 5690 /* SVD */ 5691 PetscInt max_n,min_n; 5692 max_n = max_size_of_constraint; 5693 min_n = max_constraints; 5694 if (max_size_of_constraint < max_constraints) { 5695 min_n = max_size_of_constraint; 5696 max_n = max_constraints; 5697 } 5698 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5699 #if defined(PETSC_USE_COMPLEX) 5700 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5701 #endif 5702 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5703 lwork = -1; 5704 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5705 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5706 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5707 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5708 #if !defined(PETSC_USE_COMPLEX) 5709 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5710 #else 5711 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5712 #endif 5713 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5714 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5715 #endif /* on missing GESVD */ 5716 /* Allocate optimal workspace */ 5717 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5718 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5719 } 5720 /* Now we can loop on constraining sets */ 5721 total_counts = 0; 5722 constraints_idxs_ptr[0] = 0; 5723 constraints_data_ptr[0] = 0; 5724 /* vertices */ 5725 if (n_vertices) { 5726 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5727 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5728 for (i=0;i<n_vertices;i++) { 5729 constraints_n[total_counts] = 1; 5730 constraints_data[total_counts] = 1.0; 5731 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5732 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5733 total_counts++; 5734 } 5735 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5736 n_vertices = total_counts; 5737 } 5738 5739 /* edges and faces */ 5740 total_counts_cc = total_counts; 5741 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5742 IS used_is; 5743 PetscBool idxs_copied = PETSC_FALSE; 5744 5745 if (ncc<n_ISForEdges) { 5746 used_is = ISForEdges[ncc]; 5747 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5748 } else { 5749 used_is = ISForFaces[ncc-n_ISForEdges]; 5750 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5751 } 5752 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5753 5754 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5755 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5756 /* change of basis should not be performed on local periodic nodes */ 5757 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5758 if (nnsp_has_cnst) { 5759 PetscScalar quad_value; 5760 5761 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5762 idxs_copied = PETSC_TRUE; 5763 5764 if (!pcbddc->use_nnsp_true) { 5765 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5766 } else { 5767 quad_value = 1.0; 5768 } 5769 for (j=0;j<size_of_constraint;j++) { 5770 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5771 } 5772 temp_constraints++; 5773 total_counts++; 5774 } 5775 for (k=0;k<nnsp_size;k++) { 5776 PetscReal real_value; 5777 PetscScalar *ptr_to_data; 5778 5779 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5780 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5781 for (j=0;j<size_of_constraint;j++) { 5782 ptr_to_data[j] = array[is_indices[j]]; 5783 } 5784 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5785 /* check if array is null on the connected component */ 5786 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5787 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5788 if (real_value > 0.0) { /* keep indices and values */ 5789 temp_constraints++; 5790 total_counts++; 5791 if (!idxs_copied) { 5792 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5793 idxs_copied = PETSC_TRUE; 5794 } 5795 } 5796 } 5797 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5798 valid_constraints = temp_constraints; 5799 if (!pcbddc->use_nnsp_true && temp_constraints) { 5800 if (temp_constraints == 1) { /* just normalize the constraint */ 5801 PetscScalar norm,*ptr_to_data; 5802 5803 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5804 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5805 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5806 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5807 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5808 } else { /* perform SVD */ 5809 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5810 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5811 5812 #if defined(PETSC_MISSING_LAPACK_GESVD) 5813 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5814 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5815 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5816 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5817 from that computed using LAPACKgesvd 5818 -> This is due to a different computation of eigenvectors in LAPACKheev 5819 -> The quality of the POD-computed basis will be the same */ 5820 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5821 /* Store upper triangular part of correlation matrix */ 5822 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5823 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5824 for (j=0;j<temp_constraints;j++) { 5825 for (k=0;k<j+1;k++) { 5826 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5827 } 5828 } 5829 /* compute eigenvalues and eigenvectors of correlation matrix */ 5830 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5831 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5832 #if !defined(PETSC_USE_COMPLEX) 5833 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5834 #else 5835 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5836 #endif 5837 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5838 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5839 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5840 j = 0; 5841 while (j < temp_constraints && singular_vals[j] < tol) j++; 5842 total_counts = total_counts-j; 5843 valid_constraints = temp_constraints-j; 5844 /* scale and copy POD basis into used quadrature memory */ 5845 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5846 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5847 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5848 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5849 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5850 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5851 if (j<temp_constraints) { 5852 PetscInt ii; 5853 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5854 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5855 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5856 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5857 for (k=0;k<temp_constraints-j;k++) { 5858 for (ii=0;ii<size_of_constraint;ii++) { 5859 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5860 } 5861 } 5862 } 5863 #else /* on missing GESVD */ 5864 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5865 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5866 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5867 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5868 #if !defined(PETSC_USE_COMPLEX) 5869 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5870 #else 5871 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5872 #endif 5873 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5874 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5875 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5876 k = temp_constraints; 5877 if (k > size_of_constraint) k = size_of_constraint; 5878 j = 0; 5879 while (j < k && singular_vals[k-j-1] < tol) j++; 5880 valid_constraints = k-j; 5881 total_counts = total_counts-temp_constraints+valid_constraints; 5882 #endif /* on missing GESVD */ 5883 } 5884 } 5885 /* update pointers information */ 5886 if (valid_constraints) { 5887 constraints_n[total_counts_cc] = valid_constraints; 5888 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5889 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5890 /* set change_of_basis flag */ 5891 if (boolforchange) { 5892 PetscBTSet(change_basis,total_counts_cc); 5893 } 5894 total_counts_cc++; 5895 } 5896 } 5897 /* free workspace */ 5898 if (!skip_lapack) { 5899 ierr = PetscFree(work);CHKERRQ(ierr); 5900 #if defined(PETSC_USE_COMPLEX) 5901 ierr = PetscFree(rwork);CHKERRQ(ierr); 5902 #endif 5903 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5904 #if defined(PETSC_MISSING_LAPACK_GESVD) 5905 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5906 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5907 #endif 5908 } 5909 for (k=0;k<nnsp_size;k++) { 5910 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5911 } 5912 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5913 /* free index sets of faces, edges and vertices */ 5914 for (i=0;i<n_ISForFaces;i++) { 5915 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5916 } 5917 if (n_ISForFaces) { 5918 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5919 } 5920 for (i=0;i<n_ISForEdges;i++) { 5921 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5922 } 5923 if (n_ISForEdges) { 5924 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5925 } 5926 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5927 } else { 5928 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5929 5930 total_counts = 0; 5931 n_vertices = 0; 5932 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5933 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5934 } 5935 max_constraints = 0; 5936 total_counts_cc = 0; 5937 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5938 total_counts += pcbddc->adaptive_constraints_n[i]; 5939 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5940 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5941 } 5942 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5943 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5944 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5945 constraints_data = pcbddc->adaptive_constraints_data; 5946 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5947 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5948 total_counts_cc = 0; 5949 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5950 if (pcbddc->adaptive_constraints_n[i]) { 5951 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5952 } 5953 } 5954 #if 0 5955 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5956 for (i=0;i<total_counts_cc;i++) { 5957 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5958 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5959 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5960 printf(" %d",constraints_idxs[j]); 5961 } 5962 printf("\n"); 5963 printf("number of cc: %d\n",constraints_n[i]); 5964 } 5965 for (i=0;i<n_vertices;i++) { 5966 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5967 } 5968 for (i=0;i<sub_schurs->n_subs;i++) { 5969 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5970 } 5971 #endif 5972 5973 max_size_of_constraint = 0; 5974 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5975 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5976 /* Change of basis */ 5977 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5978 if (pcbddc->use_change_of_basis) { 5979 for (i=0;i<sub_schurs->n_subs;i++) { 5980 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5981 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5982 } 5983 } 5984 } 5985 } 5986 pcbddc->local_primal_size = total_counts; 5987 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5988 5989 /* map constraints_idxs in boundary numbering */ 5990 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5991 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5992 5993 /* Create constraint matrix */ 5994 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5995 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5996 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5997 5998 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5999 /* determine if a QR strategy is needed for change of basis */ 6000 qr_needed = PETSC_FALSE; 6001 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6002 total_primal_vertices=0; 6003 pcbddc->local_primal_size_cc = 0; 6004 for (i=0;i<total_counts_cc;i++) { 6005 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6006 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6007 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6008 pcbddc->local_primal_size_cc += 1; 6009 } else if (PetscBTLookup(change_basis,i)) { 6010 for (k=0;k<constraints_n[i];k++) { 6011 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6012 } 6013 pcbddc->local_primal_size_cc += constraints_n[i]; 6014 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6015 PetscBTSet(qr_needed_idx,i); 6016 qr_needed = PETSC_TRUE; 6017 } 6018 } else { 6019 pcbddc->local_primal_size_cc += 1; 6020 } 6021 } 6022 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6023 pcbddc->n_vertices = total_primal_vertices; 6024 /* permute indices in order to have a sorted set of vertices */ 6025 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6026 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6027 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 6028 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6029 6030 /* nonzero structure of constraint matrix */ 6031 /* and get reference dof for local constraints */ 6032 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6033 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6034 6035 j = total_primal_vertices; 6036 total_counts = total_primal_vertices; 6037 cum = total_primal_vertices; 6038 for (i=n_vertices;i<total_counts_cc;i++) { 6039 if (!PetscBTLookup(change_basis,i)) { 6040 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6041 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6042 cum++; 6043 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6044 for (k=0;k<constraints_n[i];k++) { 6045 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6046 nnz[j+k] = size_of_constraint; 6047 } 6048 j += constraints_n[i]; 6049 } 6050 } 6051 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6052 ierr = PetscFree(nnz);CHKERRQ(ierr); 6053 6054 /* set values in constraint matrix */ 6055 for (i=0;i<total_primal_vertices;i++) { 6056 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6057 } 6058 total_counts = total_primal_vertices; 6059 for (i=n_vertices;i<total_counts_cc;i++) { 6060 if (!PetscBTLookup(change_basis,i)) { 6061 PetscInt *cols; 6062 6063 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6064 cols = constraints_idxs+constraints_idxs_ptr[i]; 6065 for (k=0;k<constraints_n[i];k++) { 6066 PetscInt row = total_counts+k; 6067 PetscScalar *vals; 6068 6069 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6070 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6071 } 6072 total_counts += constraints_n[i]; 6073 } 6074 } 6075 /* assembling */ 6076 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6077 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6078 6079 /* 6080 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6081 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 6082 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 6083 */ 6084 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6085 if (pcbddc->use_change_of_basis) { 6086 /* dual and primal dofs on a single cc */ 6087 PetscInt dual_dofs,primal_dofs; 6088 /* working stuff for GEQRF */ 6089 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 6090 PetscBLASInt lqr_work; 6091 /* working stuff for UNGQR */ 6092 PetscScalar *gqr_work,lgqr_work_t; 6093 PetscBLASInt lgqr_work; 6094 /* working stuff for TRTRS */ 6095 PetscScalar *trs_rhs; 6096 PetscBLASInt Blas_NRHS; 6097 /* pointers for values insertion into change of basis matrix */ 6098 PetscInt *start_rows,*start_cols; 6099 PetscScalar *start_vals; 6100 /* working stuff for values insertion */ 6101 PetscBT is_primal; 6102 PetscInt *aux_primal_numbering_B; 6103 /* matrix sizes */ 6104 PetscInt global_size,local_size; 6105 /* temporary change of basis */ 6106 Mat localChangeOfBasisMatrix; 6107 /* extra space for debugging */ 6108 PetscScalar *dbg_work; 6109 6110 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6111 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6112 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6113 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6114 /* nonzeros for local mat */ 6115 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6116 if (!pcbddc->benign_change || pcbddc->fake_change) { 6117 for (i=0;i<pcis->n;i++) nnz[i]=1; 6118 } else { 6119 const PetscInt *ii; 6120 PetscInt n; 6121 PetscBool flg_row; 6122 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6123 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6124 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6125 } 6126 for (i=n_vertices;i<total_counts_cc;i++) { 6127 if (PetscBTLookup(change_basis,i)) { 6128 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6129 if (PetscBTLookup(qr_needed_idx,i)) { 6130 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6131 } else { 6132 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6133 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6134 } 6135 } 6136 } 6137 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6138 ierr = PetscFree(nnz);CHKERRQ(ierr); 6139 /* Set interior change in the matrix */ 6140 if (!pcbddc->benign_change || pcbddc->fake_change) { 6141 for (i=0;i<pcis->n;i++) { 6142 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6143 } 6144 } else { 6145 const PetscInt *ii,*jj; 6146 PetscScalar *aa; 6147 PetscInt n; 6148 PetscBool flg_row; 6149 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6150 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6151 for (i=0;i<n;i++) { 6152 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6153 } 6154 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6155 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6156 } 6157 6158 if (pcbddc->dbg_flag) { 6159 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6160 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6161 } 6162 6163 6164 /* Now we loop on the constraints which need a change of basis */ 6165 /* 6166 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6167 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6168 6169 Basic blocks of change of basis matrix T computed by 6170 6171 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6172 6173 | 1 0 ... 0 s_1/S | 6174 | 0 1 ... 0 s_2/S | 6175 | ... | 6176 | 0 ... 1 s_{n-1}/S | 6177 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6178 6179 with S = \sum_{i=1}^n s_i^2 6180 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6181 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6182 6183 - QR decomposition of constraints otherwise 6184 */ 6185 if (qr_needed) { 6186 /* space to store Q */ 6187 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6188 /* array to store scaling factors for reflectors */ 6189 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6190 /* first we issue queries for optimal work */ 6191 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6192 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6193 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6194 lqr_work = -1; 6195 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6196 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6197 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6198 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6199 lgqr_work = -1; 6200 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6201 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6202 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6203 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6204 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6205 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6206 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6207 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6208 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6209 /* array to store rhs and solution of triangular solver */ 6210 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6211 /* allocating workspace for check */ 6212 if (pcbddc->dbg_flag) { 6213 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6214 } 6215 } 6216 /* array to store whether a node is primal or not */ 6217 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6218 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6219 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6220 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 6221 for (i=0;i<total_primal_vertices;i++) { 6222 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6223 } 6224 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6225 6226 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6227 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6228 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6229 if (PetscBTLookup(change_basis,total_counts)) { 6230 /* get constraint info */ 6231 primal_dofs = constraints_n[total_counts]; 6232 dual_dofs = size_of_constraint-primal_dofs; 6233 6234 if (pcbddc->dbg_flag) { 6235 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6236 } 6237 6238 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6239 6240 /* copy quadrature constraints for change of basis check */ 6241 if (pcbddc->dbg_flag) { 6242 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6243 } 6244 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6245 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6246 6247 /* compute QR decomposition of constraints */ 6248 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6249 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6250 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6251 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6252 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6253 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6254 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6255 6256 /* explictly compute R^-T */ 6257 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 6258 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6259 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6260 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6261 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6262 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6263 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6264 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6265 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6266 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6267 6268 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6269 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6270 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6271 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6272 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6273 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6274 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6275 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6276 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6277 6278 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6279 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6280 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6281 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6282 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6283 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6284 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6285 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6286 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6287 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6288 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6289 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6290 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6291 6292 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6293 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6294 /* insert cols for primal dofs */ 6295 for (j=0;j<primal_dofs;j++) { 6296 start_vals = &qr_basis[j*size_of_constraint]; 6297 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6298 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6299 } 6300 /* insert cols for dual dofs */ 6301 for (j=0,k=0;j<dual_dofs;k++) { 6302 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6303 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6304 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6305 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6306 j++; 6307 } 6308 } 6309 6310 /* check change of basis */ 6311 if (pcbddc->dbg_flag) { 6312 PetscInt ii,jj; 6313 PetscBool valid_qr=PETSC_TRUE; 6314 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6315 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6316 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6317 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6318 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6319 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6320 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6321 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6322 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6323 for (jj=0;jj<size_of_constraint;jj++) { 6324 for (ii=0;ii<primal_dofs;ii++) { 6325 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6326 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6327 } 6328 } 6329 if (!valid_qr) { 6330 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6331 for (jj=0;jj<size_of_constraint;jj++) { 6332 for (ii=0;ii<primal_dofs;ii++) { 6333 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6334 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6335 } 6336 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6337 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6338 } 6339 } 6340 } 6341 } else { 6342 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6343 } 6344 } 6345 } else { /* simple transformation block */ 6346 PetscInt row,col; 6347 PetscScalar val,norm; 6348 6349 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6350 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6351 for (j=0;j<size_of_constraint;j++) { 6352 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6353 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6354 if (!PetscBTLookup(is_primal,row_B)) { 6355 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6356 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6357 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6358 } else { 6359 for (k=0;k<size_of_constraint;k++) { 6360 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6361 if (row != col) { 6362 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6363 } else { 6364 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6365 } 6366 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6367 } 6368 } 6369 } 6370 if (pcbddc->dbg_flag) { 6371 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6372 } 6373 } 6374 } else { 6375 if (pcbddc->dbg_flag) { 6376 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6377 } 6378 } 6379 } 6380 6381 /* free workspace */ 6382 if (qr_needed) { 6383 if (pcbddc->dbg_flag) { 6384 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6385 } 6386 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6387 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6388 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6389 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6390 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6391 } 6392 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6393 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6394 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6395 6396 /* assembling of global change of variable */ 6397 if (!pcbddc->fake_change) { 6398 Mat tmat; 6399 PetscInt bs; 6400 6401 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6402 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6403 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6404 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6405 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6406 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6407 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6408 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6409 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6410 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6411 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6412 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6413 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6414 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6415 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6416 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6417 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6418 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6419 6420 /* check */ 6421 if (pcbddc->dbg_flag) { 6422 PetscReal error; 6423 Vec x,x_change; 6424 6425 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6426 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6427 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6428 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6429 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6430 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6431 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6432 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6433 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6434 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6435 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6436 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6437 if (error > PETSC_SMALL) { 6438 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6439 } 6440 ierr = VecDestroy(&x);CHKERRQ(ierr); 6441 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6442 } 6443 /* adapt sub_schurs computed (if any) */ 6444 if (pcbddc->use_deluxe_scaling) { 6445 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6446 6447 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6448 if (sub_schurs && sub_schurs->S_Ej_all) { 6449 Mat S_new,tmat; 6450 IS is_all_N,is_V_Sall = NULL; 6451 6452 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6453 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6454 if (pcbddc->deluxe_zerorows) { 6455 ISLocalToGlobalMapping NtoSall; 6456 IS is_V; 6457 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6458 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6459 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6460 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6461 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6462 } 6463 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6464 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6465 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6466 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6467 if (pcbddc->deluxe_zerorows) { 6468 const PetscScalar *array; 6469 const PetscInt *idxs_V,*idxs_all; 6470 PetscInt i,n_V; 6471 6472 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6473 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6474 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6475 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6476 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6477 for (i=0;i<n_V;i++) { 6478 PetscScalar val; 6479 PetscInt idx; 6480 6481 idx = idxs_V[i]; 6482 val = array[idxs_all[idxs_V[i]]]; 6483 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6484 } 6485 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6486 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6487 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6488 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6489 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6490 } 6491 sub_schurs->S_Ej_all = S_new; 6492 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6493 if (sub_schurs->sum_S_Ej_all) { 6494 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6495 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6496 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6497 if (pcbddc->deluxe_zerorows) { 6498 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6499 } 6500 sub_schurs->sum_S_Ej_all = S_new; 6501 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6502 } 6503 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6504 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6505 } 6506 /* destroy any change of basis context in sub_schurs */ 6507 if (sub_schurs && sub_schurs->change) { 6508 PetscInt i; 6509 6510 for (i=0;i<sub_schurs->n_subs;i++) { 6511 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6512 } 6513 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6514 } 6515 } 6516 if (pcbddc->switch_static) { /* need to save the local change */ 6517 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6518 } else { 6519 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6520 } 6521 /* determine if any process has changed the pressures locally */ 6522 pcbddc->change_interior = pcbddc->benign_have_null; 6523 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6524 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6525 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6526 pcbddc->use_qr_single = qr_needed; 6527 } 6528 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6529 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6530 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6531 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6532 } else { 6533 Mat benign_global = NULL; 6534 if (pcbddc->benign_have_null) { 6535 Mat tmat; 6536 6537 pcbddc->change_interior = PETSC_TRUE; 6538 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6539 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6540 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6541 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6542 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6543 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6544 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6545 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6546 if (pcbddc->benign_change) { 6547 Mat M; 6548 6549 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6550 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6551 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6552 ierr = MatDestroy(&M);CHKERRQ(ierr); 6553 } else { 6554 Mat eye; 6555 PetscScalar *array; 6556 6557 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6558 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6559 for (i=0;i<pcis->n;i++) { 6560 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6561 } 6562 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6563 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6564 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6565 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6566 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6567 } 6568 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6569 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6570 } 6571 if (pcbddc->user_ChangeOfBasisMatrix) { 6572 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6573 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6574 } else if (pcbddc->benign_have_null) { 6575 pcbddc->ChangeOfBasisMatrix = benign_global; 6576 } 6577 } 6578 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6579 IS is_global; 6580 const PetscInt *gidxs; 6581 6582 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6583 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6584 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6585 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6586 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6587 } 6588 } 6589 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6590 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6591 } 6592 6593 if (!pcbddc->fake_change) { 6594 /* add pressure dofs to set of primal nodes for numbering purposes */ 6595 for (i=0;i<pcbddc->benign_n;i++) { 6596 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6597 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6598 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6599 pcbddc->local_primal_size_cc++; 6600 pcbddc->local_primal_size++; 6601 } 6602 6603 /* check if a new primal space has been introduced (also take into account benign trick) */ 6604 pcbddc->new_primal_space_local = PETSC_TRUE; 6605 if (olocal_primal_size == pcbddc->local_primal_size) { 6606 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6607 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6608 if (!pcbddc->new_primal_space_local) { 6609 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6610 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6611 } 6612 } 6613 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6614 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6615 } 6616 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6617 6618 /* flush dbg viewer */ 6619 if (pcbddc->dbg_flag) { 6620 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6621 } 6622 6623 /* free workspace */ 6624 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6625 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6626 if (!pcbddc->adaptive_selection) { 6627 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6628 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6629 } else { 6630 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6631 pcbddc->adaptive_constraints_idxs_ptr, 6632 pcbddc->adaptive_constraints_data_ptr, 6633 pcbddc->adaptive_constraints_idxs, 6634 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6635 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6636 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6637 } 6638 PetscFunctionReturn(0); 6639 } 6640 6641 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6642 { 6643 ISLocalToGlobalMapping map; 6644 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6645 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6646 PetscInt i,N; 6647 PetscBool rcsr = PETSC_FALSE; 6648 PetscErrorCode ierr; 6649 6650 PetscFunctionBegin; 6651 if (pcbddc->recompute_topography) { 6652 pcbddc->graphanalyzed = PETSC_FALSE; 6653 /* Reset previously computed graph */ 6654 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6655 /* Init local Graph struct */ 6656 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6657 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6658 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6659 6660 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6661 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6662 } 6663 /* Check validity of the csr graph passed in by the user */ 6664 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6665 6666 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6667 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6668 PetscInt *xadj,*adjncy; 6669 PetscInt nvtxs; 6670 PetscBool flg_row=PETSC_FALSE; 6671 6672 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6673 if (flg_row) { 6674 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6675 pcbddc->computed_rowadj = PETSC_TRUE; 6676 } 6677 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6678 rcsr = PETSC_TRUE; 6679 } 6680 if (pcbddc->dbg_flag) { 6681 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6682 } 6683 6684 /* Setup of Graph */ 6685 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6686 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6687 6688 /* attach info on disconnected subdomains if present */ 6689 if (pcbddc->n_local_subs) { 6690 PetscInt *local_subs; 6691 6692 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6693 for (i=0;i<pcbddc->n_local_subs;i++) { 6694 const PetscInt *idxs; 6695 PetscInt nl,j; 6696 6697 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6698 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6699 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6700 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6701 } 6702 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6703 pcbddc->mat_graph->local_subs = local_subs; 6704 } 6705 } 6706 6707 if (!pcbddc->graphanalyzed) { 6708 /* Graph's connected components analysis */ 6709 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6710 pcbddc->graphanalyzed = PETSC_TRUE; 6711 } 6712 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6713 PetscFunctionReturn(0); 6714 } 6715 6716 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6717 { 6718 PetscInt i,j; 6719 PetscScalar *alphas; 6720 PetscErrorCode ierr; 6721 6722 PetscFunctionBegin; 6723 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6724 for (i=0;i<n;i++) { 6725 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6726 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6727 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6728 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6729 } 6730 ierr = PetscFree(alphas);CHKERRQ(ierr); 6731 PetscFunctionReturn(0); 6732 } 6733 6734 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6735 { 6736 Mat A; 6737 PetscInt n_neighs,*neighs,*n_shared,**shared; 6738 PetscMPIInt size,rank,color; 6739 PetscInt *xadj,*adjncy; 6740 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6741 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6742 PetscInt void_procs,*procs_candidates = NULL; 6743 PetscInt xadj_count,*count; 6744 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6745 PetscSubcomm psubcomm; 6746 MPI_Comm subcomm; 6747 PetscErrorCode ierr; 6748 6749 PetscFunctionBegin; 6750 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6751 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6752 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6753 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6754 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6755 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6756 6757 if (have_void) *have_void = PETSC_FALSE; 6758 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6759 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6760 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6761 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6762 im_active = !!n; 6763 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6764 void_procs = size - active_procs; 6765 /* get ranks of of non-active processes in mat communicator */ 6766 if (void_procs) { 6767 PetscInt ncand; 6768 6769 if (have_void) *have_void = PETSC_TRUE; 6770 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6771 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6772 for (i=0,ncand=0;i<size;i++) { 6773 if (!procs_candidates[i]) { 6774 procs_candidates[ncand++] = i; 6775 } 6776 } 6777 /* force n_subdomains to be not greater that the number of non-active processes */ 6778 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6779 } 6780 6781 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6782 number of subdomains requested 1 -> send to master or first candidate in voids */ 6783 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6784 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6785 PetscInt issize,isidx,dest; 6786 if (*n_subdomains == 1) dest = 0; 6787 else dest = rank; 6788 if (im_active) { 6789 issize = 1; 6790 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6791 isidx = procs_candidates[dest]; 6792 } else { 6793 isidx = dest; 6794 } 6795 } else { 6796 issize = 0; 6797 isidx = -1; 6798 } 6799 if (*n_subdomains != 1) *n_subdomains = active_procs; 6800 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6801 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6802 PetscFunctionReturn(0); 6803 } 6804 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6805 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6806 threshold = PetscMax(threshold,2); 6807 6808 /* Get info on mapping */ 6809 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6810 6811 /* build local CSR graph of subdomains' connectivity */ 6812 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6813 xadj[0] = 0; 6814 xadj[1] = PetscMax(n_neighs-1,0); 6815 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6816 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6817 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6818 for (i=1;i<n_neighs;i++) 6819 for (j=0;j<n_shared[i];j++) 6820 count[shared[i][j]] += 1; 6821 6822 xadj_count = 0; 6823 for (i=1;i<n_neighs;i++) { 6824 for (j=0;j<n_shared[i];j++) { 6825 if (count[shared[i][j]] < threshold) { 6826 adjncy[xadj_count] = neighs[i]; 6827 adjncy_wgt[xadj_count] = n_shared[i]; 6828 xadj_count++; 6829 break; 6830 } 6831 } 6832 } 6833 xadj[1] = xadj_count; 6834 ierr = PetscFree(count);CHKERRQ(ierr); 6835 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6836 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6837 6838 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6839 6840 /* Restrict work on active processes only */ 6841 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6842 if (void_procs) { 6843 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6844 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6845 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6846 subcomm = PetscSubcommChild(psubcomm); 6847 } else { 6848 psubcomm = NULL; 6849 subcomm = PetscObjectComm((PetscObject)mat); 6850 } 6851 6852 v_wgt = NULL; 6853 if (!color) { 6854 ierr = PetscFree(xadj);CHKERRQ(ierr); 6855 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6856 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6857 } else { 6858 Mat subdomain_adj; 6859 IS new_ranks,new_ranks_contig; 6860 MatPartitioning partitioner; 6861 PetscInt rstart=0,rend=0; 6862 PetscInt *is_indices,*oldranks; 6863 PetscMPIInt size; 6864 PetscBool aggregate; 6865 6866 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6867 if (void_procs) { 6868 PetscInt prank = rank; 6869 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6870 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6871 for (i=0;i<xadj[1];i++) { 6872 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6873 } 6874 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6875 } else { 6876 oldranks = NULL; 6877 } 6878 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6879 if (aggregate) { /* TODO: all this part could be made more efficient */ 6880 PetscInt lrows,row,ncols,*cols; 6881 PetscMPIInt nrank; 6882 PetscScalar *vals; 6883 6884 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6885 lrows = 0; 6886 if (nrank<redprocs) { 6887 lrows = size/redprocs; 6888 if (nrank<size%redprocs) lrows++; 6889 } 6890 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6891 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6892 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6893 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6894 row = nrank; 6895 ncols = xadj[1]-xadj[0]; 6896 cols = adjncy; 6897 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6898 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6899 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6900 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6901 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6902 ierr = PetscFree(xadj);CHKERRQ(ierr); 6903 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6904 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6905 ierr = PetscFree(vals);CHKERRQ(ierr); 6906 if (use_vwgt) { 6907 Vec v; 6908 const PetscScalar *array; 6909 PetscInt nl; 6910 6911 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6912 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 6913 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6914 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6915 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6916 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6917 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6918 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6919 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6920 ierr = VecDestroy(&v);CHKERRQ(ierr); 6921 } 6922 } else { 6923 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6924 if (use_vwgt) { 6925 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6926 v_wgt[0] = n; 6927 } 6928 } 6929 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6930 6931 /* Partition */ 6932 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6933 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6934 if (v_wgt) { 6935 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6936 } 6937 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6938 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6939 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6940 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6941 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6942 6943 /* renumber new_ranks to avoid "holes" in new set of processors */ 6944 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6945 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6946 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6947 if (!aggregate) { 6948 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6949 #if defined(PETSC_USE_DEBUG) 6950 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6951 #endif 6952 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6953 } else if (oldranks) { 6954 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6955 } else { 6956 ranks_send_to_idx[0] = is_indices[0]; 6957 } 6958 } else { 6959 PetscInt idx = 0; 6960 PetscMPIInt tag; 6961 MPI_Request *reqs; 6962 6963 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6964 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6965 for (i=rstart;i<rend;i++) { 6966 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6967 } 6968 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6969 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6970 ierr = PetscFree(reqs);CHKERRQ(ierr); 6971 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6972 #if defined(PETSC_USE_DEBUG) 6973 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6974 #endif 6975 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 6976 } else if (oldranks) { 6977 ranks_send_to_idx[0] = oldranks[idx]; 6978 } else { 6979 ranks_send_to_idx[0] = idx; 6980 } 6981 } 6982 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6983 /* clean up */ 6984 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6985 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6986 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6987 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6988 } 6989 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6990 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6991 6992 /* assemble parallel IS for sends */ 6993 i = 1; 6994 if (!color) i=0; 6995 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6996 PetscFunctionReturn(0); 6997 } 6998 6999 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7000 7001 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7002 { 7003 Mat local_mat; 7004 IS is_sends_internal; 7005 PetscInt rows,cols,new_local_rows; 7006 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7007 PetscBool ismatis,isdense,newisdense,destroy_mat; 7008 ISLocalToGlobalMapping l2gmap; 7009 PetscInt* l2gmap_indices; 7010 const PetscInt* is_indices; 7011 MatType new_local_type; 7012 /* buffers */ 7013 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7014 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7015 PetscInt *recv_buffer_idxs_local; 7016 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 7017 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7018 /* MPI */ 7019 MPI_Comm comm,comm_n; 7020 PetscSubcomm subcomm; 7021 PetscMPIInt n_sends,n_recvs,commsize; 7022 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7023 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7024 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7025 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7026 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7027 PetscErrorCode ierr; 7028 7029 PetscFunctionBegin; 7030 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7031 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7032 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7033 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7034 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7035 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7036 PetscValidLogicalCollectiveBool(mat,reuse,6); 7037 PetscValidLogicalCollectiveInt(mat,nis,8); 7038 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7039 if (nvecs) { 7040 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7041 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7042 } 7043 /* further checks */ 7044 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7045 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7046 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7047 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7048 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7049 if (reuse && *mat_n) { 7050 PetscInt mrows,mcols,mnrows,mncols; 7051 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7052 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7053 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7054 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7055 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7056 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7057 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7058 } 7059 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7060 PetscValidLogicalCollectiveInt(mat,bs,0); 7061 7062 /* prepare IS for sending if not provided */ 7063 if (!is_sends) { 7064 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7065 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7066 } else { 7067 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7068 is_sends_internal = is_sends; 7069 } 7070 7071 /* get comm */ 7072 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7073 7074 /* compute number of sends */ 7075 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7076 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7077 7078 /* compute number of receives */ 7079 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 7080 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 7081 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 7082 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7083 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7084 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7085 ierr = PetscFree(iflags);CHKERRQ(ierr); 7086 7087 /* restrict comm if requested */ 7088 subcomm = 0; 7089 destroy_mat = PETSC_FALSE; 7090 if (restrict_comm) { 7091 PetscMPIInt color,subcommsize; 7092 7093 color = 0; 7094 if (restrict_full) { 7095 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7096 } else { 7097 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7098 } 7099 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7100 subcommsize = commsize - subcommsize; 7101 /* check if reuse has been requested */ 7102 if (reuse) { 7103 if (*mat_n) { 7104 PetscMPIInt subcommsize2; 7105 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 7106 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7107 comm_n = PetscObjectComm((PetscObject)*mat_n); 7108 } else { 7109 comm_n = PETSC_COMM_SELF; 7110 } 7111 } else { /* MAT_INITIAL_MATRIX */ 7112 PetscMPIInt rank; 7113 7114 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 7115 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7116 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7117 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7118 comm_n = PetscSubcommChild(subcomm); 7119 } 7120 /* flag to destroy *mat_n if not significative */ 7121 if (color) destroy_mat = PETSC_TRUE; 7122 } else { 7123 comm_n = comm; 7124 } 7125 7126 /* prepare send/receive buffers */ 7127 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 7128 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 7129 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 7130 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 7131 if (nis) { 7132 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 7133 } 7134 7135 /* Get data from local matrices */ 7136 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7137 /* TODO: See below some guidelines on how to prepare the local buffers */ 7138 /* 7139 send_buffer_vals should contain the raw values of the local matrix 7140 send_buffer_idxs should contain: 7141 - MatType_PRIVATE type 7142 - PetscInt size_of_l2gmap 7143 - PetscInt global_row_indices[size_of_l2gmap] 7144 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7145 */ 7146 else { 7147 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7148 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7149 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7150 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7151 send_buffer_idxs[1] = i; 7152 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7153 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 7154 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7155 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7156 for (i=0;i<n_sends;i++) { 7157 ilengths_vals[is_indices[i]] = len*len; 7158 ilengths_idxs[is_indices[i]] = len+2; 7159 } 7160 } 7161 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7162 /* additional is (if any) */ 7163 if (nis) { 7164 PetscMPIInt psum; 7165 PetscInt j; 7166 for (j=0,psum=0;j<nis;j++) { 7167 PetscInt plen; 7168 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7169 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7170 psum += len+1; /* indices + lenght */ 7171 } 7172 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7173 for (j=0,psum=0;j<nis;j++) { 7174 PetscInt plen; 7175 const PetscInt *is_array_idxs; 7176 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7177 send_buffer_idxs_is[psum] = plen; 7178 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7179 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 7180 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7181 psum += plen+1; /* indices + lenght */ 7182 } 7183 for (i=0;i<n_sends;i++) { 7184 ilengths_idxs_is[is_indices[i]] = psum; 7185 } 7186 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7187 } 7188 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7189 7190 buf_size_idxs = 0; 7191 buf_size_vals = 0; 7192 buf_size_idxs_is = 0; 7193 buf_size_vecs = 0; 7194 for (i=0;i<n_recvs;i++) { 7195 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7196 buf_size_vals += (PetscInt)olengths_vals[i]; 7197 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7198 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7199 } 7200 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7201 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7202 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7203 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7204 7205 /* get new tags for clean communications */ 7206 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7207 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7208 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7209 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7210 7211 /* allocate for requests */ 7212 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7213 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7214 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7215 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7216 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7217 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7218 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7219 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7220 7221 /* communications */ 7222 ptr_idxs = recv_buffer_idxs; 7223 ptr_vals = recv_buffer_vals; 7224 ptr_idxs_is = recv_buffer_idxs_is; 7225 ptr_vecs = recv_buffer_vecs; 7226 for (i=0;i<n_recvs;i++) { 7227 source_dest = onodes[i]; 7228 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7229 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7230 ptr_idxs += olengths_idxs[i]; 7231 ptr_vals += olengths_vals[i]; 7232 if (nis) { 7233 source_dest = onodes_is[i]; 7234 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7235 ptr_idxs_is += olengths_idxs_is[i]; 7236 } 7237 if (nvecs) { 7238 source_dest = onodes[i]; 7239 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7240 ptr_vecs += olengths_idxs[i]-2; 7241 } 7242 } 7243 for (i=0;i<n_sends;i++) { 7244 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7245 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7246 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7247 if (nis) { 7248 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7249 } 7250 if (nvecs) { 7251 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7252 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7253 } 7254 } 7255 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7256 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7257 7258 /* assemble new l2g map */ 7259 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7260 ptr_idxs = recv_buffer_idxs; 7261 new_local_rows = 0; 7262 for (i=0;i<n_recvs;i++) { 7263 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7264 ptr_idxs += olengths_idxs[i]; 7265 } 7266 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7267 ptr_idxs = recv_buffer_idxs; 7268 new_local_rows = 0; 7269 for (i=0;i<n_recvs;i++) { 7270 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 7271 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7272 ptr_idxs += olengths_idxs[i]; 7273 } 7274 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7275 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7276 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7277 7278 /* infer new local matrix type from received local matrices type */ 7279 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7280 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7281 if (n_recvs) { 7282 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7283 ptr_idxs = recv_buffer_idxs; 7284 for (i=0;i<n_recvs;i++) { 7285 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7286 new_local_type_private = MATAIJ_PRIVATE; 7287 break; 7288 } 7289 ptr_idxs += olengths_idxs[i]; 7290 } 7291 switch (new_local_type_private) { 7292 case MATDENSE_PRIVATE: 7293 new_local_type = MATSEQAIJ; 7294 bs = 1; 7295 break; 7296 case MATAIJ_PRIVATE: 7297 new_local_type = MATSEQAIJ; 7298 bs = 1; 7299 break; 7300 case MATBAIJ_PRIVATE: 7301 new_local_type = MATSEQBAIJ; 7302 break; 7303 case MATSBAIJ_PRIVATE: 7304 new_local_type = MATSEQSBAIJ; 7305 break; 7306 default: 7307 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7308 break; 7309 } 7310 } else { /* by default, new_local_type is seqaij */ 7311 new_local_type = MATSEQAIJ; 7312 bs = 1; 7313 } 7314 7315 /* create MATIS object if needed */ 7316 if (!reuse) { 7317 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7318 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7319 } else { 7320 /* it also destroys the local matrices */ 7321 if (*mat_n) { 7322 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7323 } else { /* this is a fake object */ 7324 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7325 } 7326 } 7327 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7328 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7329 7330 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7331 7332 /* Global to local map of received indices */ 7333 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7334 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7335 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7336 7337 /* restore attributes -> type of incoming data and its size */ 7338 buf_size_idxs = 0; 7339 for (i=0;i<n_recvs;i++) { 7340 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7341 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7342 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7343 } 7344 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7345 7346 /* set preallocation */ 7347 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7348 if (!newisdense) { 7349 PetscInt *new_local_nnz=0; 7350 7351 ptr_idxs = recv_buffer_idxs_local; 7352 if (n_recvs) { 7353 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7354 } 7355 for (i=0;i<n_recvs;i++) { 7356 PetscInt j; 7357 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7358 for (j=0;j<*(ptr_idxs+1);j++) { 7359 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7360 } 7361 } else { 7362 /* TODO */ 7363 } 7364 ptr_idxs += olengths_idxs[i]; 7365 } 7366 if (new_local_nnz) { 7367 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7368 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7369 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7370 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7371 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7372 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7373 } else { 7374 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7375 } 7376 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7377 } else { 7378 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7379 } 7380 7381 /* set values */ 7382 ptr_vals = recv_buffer_vals; 7383 ptr_idxs = recv_buffer_idxs_local; 7384 for (i=0;i<n_recvs;i++) { 7385 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7386 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7387 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7388 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7389 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7390 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7391 } else { 7392 /* TODO */ 7393 } 7394 ptr_idxs += olengths_idxs[i]; 7395 ptr_vals += olengths_vals[i]; 7396 } 7397 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7398 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7399 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7400 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7401 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7402 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7403 7404 #if 0 7405 if (!restrict_comm) { /* check */ 7406 Vec lvec,rvec; 7407 PetscReal infty_error; 7408 7409 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7410 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7411 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7412 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7413 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7414 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7415 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7416 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7417 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7418 } 7419 #endif 7420 7421 /* assemble new additional is (if any) */ 7422 if (nis) { 7423 PetscInt **temp_idxs,*count_is,j,psum; 7424 7425 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7426 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7427 ptr_idxs = recv_buffer_idxs_is; 7428 psum = 0; 7429 for (i=0;i<n_recvs;i++) { 7430 for (j=0;j<nis;j++) { 7431 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7432 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7433 psum += plen; 7434 ptr_idxs += plen+1; /* shift pointer to received data */ 7435 } 7436 } 7437 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7438 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7439 for (i=1;i<nis;i++) { 7440 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7441 } 7442 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7443 ptr_idxs = recv_buffer_idxs_is; 7444 for (i=0;i<n_recvs;i++) { 7445 for (j=0;j<nis;j++) { 7446 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7447 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7448 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7449 ptr_idxs += plen+1; /* shift pointer to received data */ 7450 } 7451 } 7452 for (i=0;i<nis;i++) { 7453 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7454 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7455 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7456 } 7457 ierr = PetscFree(count_is);CHKERRQ(ierr); 7458 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7459 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7460 } 7461 /* free workspace */ 7462 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7463 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7464 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7465 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7466 if (isdense) { 7467 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7468 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7469 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7470 } else { 7471 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7472 } 7473 if (nis) { 7474 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7475 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7476 } 7477 7478 if (nvecs) { 7479 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7480 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7481 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7482 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7483 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7484 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7485 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7486 /* set values */ 7487 ptr_vals = recv_buffer_vecs; 7488 ptr_idxs = recv_buffer_idxs_local; 7489 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7490 for (i=0;i<n_recvs;i++) { 7491 PetscInt j; 7492 for (j=0;j<*(ptr_idxs+1);j++) { 7493 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7494 } 7495 ptr_idxs += olengths_idxs[i]; 7496 ptr_vals += olengths_idxs[i]-2; 7497 } 7498 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7499 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7500 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7501 } 7502 7503 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7504 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7505 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7506 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7507 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7508 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7509 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7510 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7511 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7512 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7513 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7514 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7515 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7516 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7517 ierr = PetscFree(onodes);CHKERRQ(ierr); 7518 if (nis) { 7519 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7520 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7521 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7522 } 7523 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7524 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7525 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7526 for (i=0;i<nis;i++) { 7527 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7528 } 7529 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7530 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7531 } 7532 *mat_n = NULL; 7533 } 7534 PetscFunctionReturn(0); 7535 } 7536 7537 /* temporary hack into ksp private data structure */ 7538 #include <petsc/private/kspimpl.h> 7539 7540 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7541 { 7542 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7543 PC_IS *pcis = (PC_IS*)pc->data; 7544 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7545 Mat coarsedivudotp = NULL; 7546 Mat coarseG,t_coarse_mat_is; 7547 MatNullSpace CoarseNullSpace = NULL; 7548 ISLocalToGlobalMapping coarse_islg; 7549 IS coarse_is,*isarray; 7550 PetscInt i,im_active=-1,active_procs=-1; 7551 PetscInt nis,nisdofs,nisneu,nisvert; 7552 PC pc_temp; 7553 PCType coarse_pc_type; 7554 KSPType coarse_ksp_type; 7555 PetscBool multilevel_requested,multilevel_allowed; 7556 PetscBool coarse_reuse; 7557 PetscInt ncoarse,nedcfield; 7558 PetscBool compute_vecs = PETSC_FALSE; 7559 PetscScalar *array; 7560 MatReuse coarse_mat_reuse; 7561 PetscBool restr, full_restr, have_void; 7562 PetscMPIInt commsize; 7563 PetscErrorCode ierr; 7564 7565 PetscFunctionBegin; 7566 /* Assign global numbering to coarse dofs */ 7567 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7568 PetscInt ocoarse_size; 7569 compute_vecs = PETSC_TRUE; 7570 7571 pcbddc->new_primal_space = PETSC_TRUE; 7572 ocoarse_size = pcbddc->coarse_size; 7573 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7574 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7575 /* see if we can avoid some work */ 7576 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7577 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7578 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7579 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7580 coarse_reuse = PETSC_FALSE; 7581 } else { /* we can safely reuse already computed coarse matrix */ 7582 coarse_reuse = PETSC_TRUE; 7583 } 7584 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7585 coarse_reuse = PETSC_FALSE; 7586 } 7587 /* reset any subassembling information */ 7588 if (!coarse_reuse || pcbddc->recompute_topography) { 7589 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7590 } 7591 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7592 coarse_reuse = PETSC_TRUE; 7593 } 7594 /* assemble coarse matrix */ 7595 if (coarse_reuse && pcbddc->coarse_ksp) { 7596 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7597 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7598 coarse_mat_reuse = MAT_REUSE_MATRIX; 7599 } else { 7600 coarse_mat = NULL; 7601 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7602 } 7603 7604 /* creates temporary l2gmap and IS for coarse indexes */ 7605 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7606 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7607 7608 /* creates temporary MATIS object for coarse matrix */ 7609 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7610 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7611 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7612 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7613 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7614 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7615 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7616 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7617 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7618 7619 /* count "active" (i.e. with positive local size) and "void" processes */ 7620 im_active = !!(pcis->n); 7621 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7622 7623 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7624 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7625 /* full_restr : just use the receivers from the subassembling pattern */ 7626 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);CHKERRQ(ierr); 7627 coarse_mat_is = NULL; 7628 multilevel_allowed = PETSC_FALSE; 7629 multilevel_requested = PETSC_FALSE; 7630 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7631 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7632 if (multilevel_requested) { 7633 ncoarse = active_procs/pcbddc->coarsening_ratio; 7634 restr = PETSC_FALSE; 7635 full_restr = PETSC_FALSE; 7636 } else { 7637 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7638 restr = PETSC_TRUE; 7639 full_restr = PETSC_TRUE; 7640 } 7641 if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7642 ncoarse = PetscMax(1,ncoarse); 7643 if (!pcbddc->coarse_subassembling) { 7644 if (pcbddc->coarsening_ratio > 1) { 7645 if (multilevel_requested) { 7646 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7647 } else { 7648 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7649 } 7650 } else { 7651 PetscMPIInt rank; 7652 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7653 have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE; 7654 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7655 } 7656 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7657 PetscInt psum; 7658 if (pcbddc->coarse_ksp) psum = 1; 7659 else psum = 0; 7660 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7661 if (ncoarse < commsize) have_void = PETSC_TRUE; 7662 } 7663 /* determine if we can go multilevel */ 7664 if (multilevel_requested) { 7665 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7666 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7667 } 7668 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7669 7670 /* dump subassembling pattern */ 7671 if (pcbddc->dbg_flag && multilevel_allowed) { 7672 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7673 } 7674 7675 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7676 nedcfield = -1; 7677 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7678 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7679 const PetscInt *idxs; 7680 ISLocalToGlobalMapping tmap; 7681 7682 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7683 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7684 /* allocate space for temporary storage */ 7685 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7686 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7687 /* allocate for IS array */ 7688 nisdofs = pcbddc->n_ISForDofsLocal; 7689 if (pcbddc->nedclocal) { 7690 if (pcbddc->nedfield > -1) { 7691 nedcfield = pcbddc->nedfield; 7692 } else { 7693 nedcfield = 0; 7694 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7695 nisdofs = 1; 7696 } 7697 } 7698 nisneu = !!pcbddc->NeumannBoundariesLocal; 7699 nisvert = 0; /* nisvert is not used */ 7700 nis = nisdofs + nisneu + nisvert; 7701 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7702 /* dofs splitting */ 7703 for (i=0;i<nisdofs;i++) { 7704 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7705 if (nedcfield != i) { 7706 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7707 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7708 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7709 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7710 } else { 7711 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7712 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7713 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7714 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7715 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7716 } 7717 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7718 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7719 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7720 } 7721 /* neumann boundaries */ 7722 if (pcbddc->NeumannBoundariesLocal) { 7723 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7724 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7725 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7726 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7727 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7728 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7729 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7730 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7731 } 7732 /* free memory */ 7733 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7734 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7735 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7736 } else { 7737 nis = 0; 7738 nisdofs = 0; 7739 nisneu = 0; 7740 nisvert = 0; 7741 isarray = NULL; 7742 } 7743 /* destroy no longer needed map */ 7744 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7745 7746 /* subassemble */ 7747 if (multilevel_allowed) { 7748 Vec vp[1]; 7749 PetscInt nvecs = 0; 7750 PetscBool reuse,reuser; 7751 7752 if (coarse_mat) reuse = PETSC_TRUE; 7753 else reuse = PETSC_FALSE; 7754 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7755 vp[0] = NULL; 7756 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7757 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7758 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7759 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7760 nvecs = 1; 7761 7762 if (pcbddc->divudotp) { 7763 Mat B,loc_divudotp; 7764 Vec v,p; 7765 IS dummy; 7766 PetscInt np; 7767 7768 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7769 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7770 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7771 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7772 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7773 ierr = VecSet(p,1.);CHKERRQ(ierr); 7774 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7775 ierr = VecDestroy(&p);CHKERRQ(ierr); 7776 ierr = MatDestroy(&B);CHKERRQ(ierr); 7777 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7778 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7779 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7780 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7781 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7782 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7783 ierr = VecDestroy(&v);CHKERRQ(ierr); 7784 } 7785 } 7786 if (reuser) { 7787 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7788 } else { 7789 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7790 } 7791 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7792 PetscScalar *arraym,*arrayv; 7793 PetscInt nl; 7794 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7795 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7796 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7797 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7798 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7799 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7800 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7801 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7802 } else { 7803 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7804 } 7805 } else { 7806 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7807 } 7808 if (coarse_mat_is || coarse_mat) { 7809 PetscMPIInt size; 7810 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7811 if (!multilevel_allowed) { 7812 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7813 } else { 7814 Mat A; 7815 7816 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7817 if (coarse_mat_is) { 7818 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7819 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7820 coarse_mat = coarse_mat_is; 7821 } 7822 /* be sure we don't have MatSeqDENSE as local mat */ 7823 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7824 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7825 } 7826 } 7827 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7828 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7829 7830 /* create local to global scatters for coarse problem */ 7831 if (compute_vecs) { 7832 PetscInt lrows; 7833 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7834 if (coarse_mat) { 7835 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7836 } else { 7837 lrows = 0; 7838 } 7839 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7840 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7841 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7842 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7843 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7844 } 7845 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7846 7847 /* set defaults for coarse KSP and PC */ 7848 if (multilevel_allowed) { 7849 coarse_ksp_type = KSPRICHARDSON; 7850 coarse_pc_type = PCBDDC; 7851 } else { 7852 coarse_ksp_type = KSPPREONLY; 7853 coarse_pc_type = PCREDUNDANT; 7854 } 7855 7856 /* print some info if requested */ 7857 if (pcbddc->dbg_flag) { 7858 if (!multilevel_allowed) { 7859 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7860 if (multilevel_requested) { 7861 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7862 } else if (pcbddc->max_levels) { 7863 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7864 } 7865 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7866 } 7867 } 7868 7869 /* communicate coarse discrete gradient */ 7870 coarseG = NULL; 7871 if (pcbddc->nedcG && multilevel_allowed) { 7872 MPI_Comm ccomm; 7873 if (coarse_mat) { 7874 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7875 } else { 7876 ccomm = MPI_COMM_NULL; 7877 } 7878 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7879 } 7880 7881 /* create the coarse KSP object only once with defaults */ 7882 if (coarse_mat) { 7883 PetscBool isredundant,isnn,isbddc; 7884 PetscViewer dbg_viewer = NULL; 7885 7886 if (pcbddc->dbg_flag) { 7887 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7888 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7889 } 7890 if (!pcbddc->coarse_ksp) { 7891 char prefix[256],str_level[16]; 7892 size_t len; 7893 7894 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7895 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7896 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7897 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7898 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7899 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7900 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7901 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7902 /* TODO is this logic correct? should check for coarse_mat type */ 7903 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7904 /* prefix */ 7905 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7906 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7907 if (!pcbddc->current_level) { 7908 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7909 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7910 } else { 7911 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7912 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7913 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7914 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7915 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 7916 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7917 } 7918 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7919 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7920 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7921 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7922 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7923 /* allow user customization */ 7924 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7925 } 7926 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7927 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7928 if (nisdofs) { 7929 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7930 for (i=0;i<nisdofs;i++) { 7931 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7932 } 7933 } 7934 if (nisneu) { 7935 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7936 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7937 } 7938 if (nisvert) { 7939 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7940 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7941 } 7942 if (coarseG) { 7943 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7944 } 7945 7946 /* get some info after set from options */ 7947 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7948 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 7949 if (isbddc && !multilevel_allowed) { 7950 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7951 isbddc = PETSC_FALSE; 7952 } 7953 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 7954 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7955 if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) { 7956 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 7957 isbddc = PETSC_TRUE; 7958 } 7959 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7960 if (isredundant) { 7961 KSP inner_ksp; 7962 PC inner_pc; 7963 7964 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7965 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7966 } 7967 7968 /* parameters which miss an API */ 7969 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7970 if (isbddc) { 7971 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7972 7973 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7974 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7975 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7976 if (pcbddc_coarse->benign_saddle_point) { 7977 Mat coarsedivudotp_is; 7978 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7979 IS row,col; 7980 const PetscInt *gidxs; 7981 PetscInt n,st,M,N; 7982 7983 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7984 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7985 st = st-n; 7986 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7987 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7988 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7989 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7990 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7991 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7992 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7993 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7994 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7995 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7996 ierr = ISDestroy(&row);CHKERRQ(ierr); 7997 ierr = ISDestroy(&col);CHKERRQ(ierr); 7998 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7999 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8000 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8001 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8002 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8003 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8004 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8005 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8006 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8007 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8008 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8009 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8010 } 8011 } 8012 8013 /* propagate symmetry info of coarse matrix */ 8014 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8015 if (pc->pmat->symmetric_set) { 8016 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8017 } 8018 if (pc->pmat->hermitian_set) { 8019 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8020 } 8021 if (pc->pmat->spd_set) { 8022 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8023 } 8024 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8025 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8026 } 8027 /* set operators */ 8028 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8029 if (pcbddc->dbg_flag) { 8030 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8031 } 8032 } 8033 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8034 ierr = PetscFree(isarray);CHKERRQ(ierr); 8035 #if 0 8036 { 8037 PetscViewer viewer; 8038 char filename[256]; 8039 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8040 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8041 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8042 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8043 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8044 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8045 } 8046 #endif 8047 8048 if (pcbddc->coarse_ksp) { 8049 Vec crhs,csol; 8050 8051 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8052 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8053 if (!csol) { 8054 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8055 } 8056 if (!crhs) { 8057 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8058 } 8059 } 8060 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8061 8062 /* compute null space for coarse solver if the benign trick has been requested */ 8063 if (pcbddc->benign_null) { 8064 8065 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8066 for (i=0;i<pcbddc->benign_n;i++) { 8067 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8068 } 8069 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8070 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8071 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8072 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8073 if (coarse_mat) { 8074 Vec nullv; 8075 PetscScalar *array,*array2; 8076 PetscInt nl; 8077 8078 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8079 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8080 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8081 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8082 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 8083 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8084 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8085 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8086 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8087 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8088 } 8089 } 8090 8091 if (pcbddc->coarse_ksp) { 8092 PetscBool ispreonly; 8093 8094 if (CoarseNullSpace) { 8095 PetscBool isnull; 8096 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8097 if (isnull) { 8098 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8099 } 8100 /* TODO: add local nullspaces (if any) */ 8101 } 8102 /* setup coarse ksp */ 8103 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8104 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8105 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8106 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 8107 KSP check_ksp; 8108 KSPType check_ksp_type; 8109 PC check_pc; 8110 Vec check_vec,coarse_vec; 8111 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8112 PetscInt its; 8113 PetscBool compute_eigs; 8114 PetscReal *eigs_r,*eigs_c; 8115 PetscInt neigs; 8116 const char *prefix; 8117 8118 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8119 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8120 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8121 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8122 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8123 /* prevent from setup unneeded object */ 8124 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8125 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8126 if (ispreonly) { 8127 check_ksp_type = KSPPREONLY; 8128 compute_eigs = PETSC_FALSE; 8129 } else { 8130 check_ksp_type = KSPGMRES; 8131 compute_eigs = PETSC_TRUE; 8132 } 8133 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8134 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8135 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8136 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8137 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8138 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8139 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8140 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8141 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8142 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8143 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8144 /* create random vec */ 8145 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8146 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8147 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8148 /* solve coarse problem */ 8149 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8150 /* set eigenvalue estimation if preonly has not been requested */ 8151 if (compute_eigs) { 8152 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8153 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8154 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8155 if (neigs) { 8156 lambda_max = eigs_r[neigs-1]; 8157 lambda_min = eigs_r[0]; 8158 if (pcbddc->use_coarse_estimates) { 8159 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8160 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8161 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8162 } 8163 } 8164 } 8165 } 8166 8167 /* check coarse problem residual error */ 8168 if (pcbddc->dbg_flag) { 8169 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8170 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8171 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8172 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8173 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8174 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8175 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8176 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8177 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8178 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8179 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8180 if (CoarseNullSpace) { 8181 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8182 } 8183 if (compute_eigs) { 8184 PetscReal lambda_max_s,lambda_min_s; 8185 KSPConvergedReason reason; 8186 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8187 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8188 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8189 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8190 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8191 for (i=0;i<neigs;i++) { 8192 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8193 } 8194 } 8195 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8196 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8197 } 8198 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8199 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8200 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8201 if (compute_eigs) { 8202 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8203 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8204 } 8205 } 8206 } 8207 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8208 /* print additional info */ 8209 if (pcbddc->dbg_flag) { 8210 /* waits until all processes reaches this point */ 8211 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8212 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 8213 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8214 } 8215 8216 /* free memory */ 8217 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8218 PetscFunctionReturn(0); 8219 } 8220 8221 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8222 { 8223 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8224 PC_IS* pcis = (PC_IS*)pc->data; 8225 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8226 IS subset,subset_mult,subset_n; 8227 PetscInt local_size,coarse_size=0; 8228 PetscInt *local_primal_indices=NULL; 8229 const PetscInt *t_local_primal_indices; 8230 PetscErrorCode ierr; 8231 8232 PetscFunctionBegin; 8233 /* Compute global number of coarse dofs */ 8234 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8235 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8236 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8237 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8238 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8239 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8240 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8241 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8242 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8243 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8244 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8245 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8246 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 8247 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8248 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8249 8250 /* check numbering */ 8251 if (pcbddc->dbg_flag) { 8252 PetscScalar coarsesum,*array,*array2; 8253 PetscInt i; 8254 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8255 8256 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8257 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8258 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8259 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8260 /* counter */ 8261 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8262 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8263 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8264 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8265 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8266 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8267 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8268 for (i=0;i<pcbddc->local_primal_size;i++) { 8269 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8270 } 8271 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8272 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8273 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8274 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8275 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8276 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8277 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8278 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8279 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8280 for (i=0;i<pcis->n;i++) { 8281 if (array[i] != 0.0 && array[i] != array2[i]) { 8282 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8283 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8284 set_error = PETSC_TRUE; 8285 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8286 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8287 } 8288 } 8289 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8290 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8291 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8292 for (i=0;i<pcis->n;i++) { 8293 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8294 } 8295 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8296 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8297 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8298 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8299 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8300 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8301 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8302 PetscInt *gidxs; 8303 8304 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8305 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8306 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8307 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8308 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8309 for (i=0;i<pcbddc->local_primal_size;i++) { 8310 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8311 } 8312 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8313 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8314 } 8315 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8316 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8317 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8318 } 8319 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 8320 /* get back data */ 8321 *coarse_size_n = coarse_size; 8322 *local_primal_indices_n = local_primal_indices; 8323 PetscFunctionReturn(0); 8324 } 8325 8326 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8327 { 8328 IS localis_t; 8329 PetscInt i,lsize,*idxs,n; 8330 PetscScalar *vals; 8331 PetscErrorCode ierr; 8332 8333 PetscFunctionBegin; 8334 /* get indices in local ordering exploiting local to global map */ 8335 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8336 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8337 for (i=0;i<lsize;i++) vals[i] = 1.0; 8338 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8339 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8340 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8341 if (idxs) { /* multilevel guard */ 8342 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 8343 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8344 } 8345 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8346 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8347 ierr = PetscFree(vals);CHKERRQ(ierr); 8348 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8349 /* now compute set in local ordering */ 8350 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8351 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8352 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8353 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8354 for (i=0,lsize=0;i<n;i++) { 8355 if (PetscRealPart(vals[i]) > 0.5) { 8356 lsize++; 8357 } 8358 } 8359 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8360 for (i=0,lsize=0;i<n;i++) { 8361 if (PetscRealPart(vals[i]) > 0.5) { 8362 idxs[lsize++] = i; 8363 } 8364 } 8365 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8366 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8367 *localis = localis_t; 8368 PetscFunctionReturn(0); 8369 } 8370 8371 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8372 { 8373 PC_IS *pcis=(PC_IS*)pc->data; 8374 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8375 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8376 Mat S_j; 8377 PetscInt *used_xadj,*used_adjncy; 8378 PetscBool free_used_adj; 8379 PetscErrorCode ierr; 8380 8381 PetscFunctionBegin; 8382 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8383 free_used_adj = PETSC_FALSE; 8384 if (pcbddc->sub_schurs_layers == -1) { 8385 used_xadj = NULL; 8386 used_adjncy = NULL; 8387 } else { 8388 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8389 used_xadj = pcbddc->mat_graph->xadj; 8390 used_adjncy = pcbddc->mat_graph->adjncy; 8391 } else if (pcbddc->computed_rowadj) { 8392 used_xadj = pcbddc->mat_graph->xadj; 8393 used_adjncy = pcbddc->mat_graph->adjncy; 8394 } else { 8395 PetscBool flg_row=PETSC_FALSE; 8396 const PetscInt *xadj,*adjncy; 8397 PetscInt nvtxs; 8398 8399 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8400 if (flg_row) { 8401 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8402 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8403 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8404 free_used_adj = PETSC_TRUE; 8405 } else { 8406 pcbddc->sub_schurs_layers = -1; 8407 used_xadj = NULL; 8408 used_adjncy = NULL; 8409 } 8410 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8411 } 8412 } 8413 8414 /* setup sub_schurs data */ 8415 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8416 if (!sub_schurs->schur_explicit) { 8417 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8418 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8419 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8420 } else { 8421 Mat change = NULL; 8422 Vec scaling = NULL; 8423 IS change_primal = NULL, iP; 8424 PetscInt benign_n; 8425 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8426 PetscBool isseqaij,need_change = PETSC_FALSE; 8427 PetscBool discrete_harmonic = PETSC_FALSE; 8428 8429 if (!pcbddc->use_vertices && reuse_solvers) { 8430 PetscInt n_vertices; 8431 8432 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8433 reuse_solvers = (PetscBool)!n_vertices; 8434 } 8435 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8436 if (!isseqaij) { 8437 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8438 if (matis->A == pcbddc->local_mat) { 8439 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8440 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8441 } else { 8442 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8443 } 8444 } 8445 if (!pcbddc->benign_change_explicit) { 8446 benign_n = pcbddc->benign_n; 8447 } else { 8448 benign_n = 0; 8449 } 8450 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8451 We need a global reduction to avoid possible deadlocks. 8452 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8453 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8454 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8455 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8456 need_change = (PetscBool)(!need_change); 8457 } 8458 /* If the user defines additional constraints, we import them here. 8459 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8460 if (need_change) { 8461 PC_IS *pcisf; 8462 PC_BDDC *pcbddcf; 8463 PC pcf; 8464 8465 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8466 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8467 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8468 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8469 8470 /* hacks */ 8471 pcisf = (PC_IS*)pcf->data; 8472 pcisf->is_B_local = pcis->is_B_local; 8473 pcisf->vec1_N = pcis->vec1_N; 8474 pcisf->BtoNmap = pcis->BtoNmap; 8475 pcisf->n = pcis->n; 8476 pcisf->n_B = pcis->n_B; 8477 pcbddcf = (PC_BDDC*)pcf->data; 8478 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8479 pcbddcf->mat_graph = pcbddc->mat_graph; 8480 pcbddcf->use_faces = PETSC_TRUE; 8481 pcbddcf->use_change_of_basis = PETSC_TRUE; 8482 pcbddcf->use_change_on_faces = PETSC_TRUE; 8483 pcbddcf->use_qr_single = PETSC_TRUE; 8484 pcbddcf->fake_change = PETSC_TRUE; 8485 8486 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8487 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8488 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8489 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8490 change = pcbddcf->ConstraintMatrix; 8491 pcbddcf->ConstraintMatrix = NULL; 8492 8493 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8494 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8495 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8496 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8497 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8498 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8499 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8500 pcf->ops->destroy = NULL; 8501 pcf->ops->reset = NULL; 8502 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8503 } 8504 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8505 8506 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 8507 if (iP) { 8508 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 8509 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 8510 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8511 } 8512 if (discrete_harmonic) { 8513 Mat A; 8514 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 8515 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 8516 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 8517 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8518 ierr = MatDestroy(&A);CHKERRQ(ierr); 8519 } else { 8520 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8521 } 8522 ierr = MatDestroy(&change);CHKERRQ(ierr); 8523 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8524 } 8525 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8526 8527 /* free adjacency */ 8528 if (free_used_adj) { 8529 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8530 } 8531 PetscFunctionReturn(0); 8532 } 8533 8534 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8535 { 8536 PC_IS *pcis=(PC_IS*)pc->data; 8537 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8538 PCBDDCGraph graph; 8539 PetscErrorCode ierr; 8540 8541 PetscFunctionBegin; 8542 /* attach interface graph for determining subsets */ 8543 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8544 IS verticesIS,verticescomm; 8545 PetscInt vsize,*idxs; 8546 8547 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8548 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8549 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8550 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8551 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8552 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8553 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8554 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8555 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8556 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8557 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8558 } else { 8559 graph = pcbddc->mat_graph; 8560 } 8561 /* print some info */ 8562 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8563 IS vertices; 8564 PetscInt nv,nedges,nfaces; 8565 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8566 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8567 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8568 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8569 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8570 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8571 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8572 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8573 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8574 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8575 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8576 } 8577 8578 /* sub_schurs init */ 8579 if (!pcbddc->sub_schurs) { 8580 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8581 } 8582 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8583 pcbddc->sub_schurs->prefix = ((PetscObject)pc)->prefix; 8584 8585 /* free graph struct */ 8586 if (pcbddc->sub_schurs_rebuild) { 8587 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8588 } 8589 PetscFunctionReturn(0); 8590 } 8591 8592 PetscErrorCode PCBDDCCheckOperator(PC pc) 8593 { 8594 PC_IS *pcis=(PC_IS*)pc->data; 8595 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8596 PetscErrorCode ierr; 8597 8598 PetscFunctionBegin; 8599 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8600 IS zerodiag = NULL; 8601 Mat S_j,B0_B=NULL; 8602 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8603 PetscScalar *p0_check,*array,*array2; 8604 PetscReal norm; 8605 PetscInt i; 8606 8607 /* B0 and B0_B */ 8608 if (zerodiag) { 8609 IS dummy; 8610 8611 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8612 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8613 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8614 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8615 } 8616 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8617 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8618 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8619 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8620 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8621 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8622 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8623 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8624 /* S_j */ 8625 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8626 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8627 8628 /* mimic vector in \widetilde{W}_\Gamma */ 8629 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8630 /* continuous in primal space */ 8631 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8632 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8633 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8634 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8635 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8636 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8637 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8638 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8639 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8640 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8641 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8642 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8643 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8644 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8645 8646 /* assemble rhs for coarse problem */ 8647 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8648 /* local with Schur */ 8649 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8650 if (zerodiag) { 8651 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8652 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8653 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8654 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8655 } 8656 /* sum on primal nodes the local contributions */ 8657 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8658 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8659 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8660 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8661 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8662 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8663 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8664 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8665 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8666 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8667 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8668 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8669 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8670 /* scale primal nodes (BDDC sums contibutions) */ 8671 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8672 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8673 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8674 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8675 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8676 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8677 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8678 /* global: \widetilde{B0}_B w_\Gamma */ 8679 if (zerodiag) { 8680 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8681 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8682 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8683 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8684 } 8685 /* BDDC */ 8686 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8687 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8688 8689 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8690 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8691 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8692 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8693 for (i=0;i<pcbddc->benign_n;i++) { 8694 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8695 } 8696 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8697 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8698 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8699 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8700 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8701 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8702 } 8703 PetscFunctionReturn(0); 8704 } 8705 8706 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8707 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8708 { 8709 Mat At; 8710 IS rows; 8711 PetscInt rst,ren; 8712 PetscErrorCode ierr; 8713 PetscLayout rmap; 8714 8715 PetscFunctionBegin; 8716 rst = ren = 0; 8717 if (ccomm != MPI_COMM_NULL) { 8718 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8719 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8720 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8721 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8722 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8723 } 8724 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8725 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8726 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8727 8728 if (ccomm != MPI_COMM_NULL) { 8729 Mat_MPIAIJ *a,*b; 8730 IS from,to; 8731 Vec gvec; 8732 PetscInt lsize; 8733 8734 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8735 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8736 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8737 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8738 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8739 a = (Mat_MPIAIJ*)At->data; 8740 b = (Mat_MPIAIJ*)(*B)->data; 8741 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8742 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8743 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8744 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8745 b->A = a->A; 8746 b->B = a->B; 8747 8748 b->donotstash = a->donotstash; 8749 b->roworiented = a->roworiented; 8750 b->rowindices = 0; 8751 b->rowvalues = 0; 8752 b->getrowactive = PETSC_FALSE; 8753 8754 (*B)->rmap = rmap; 8755 (*B)->factortype = A->factortype; 8756 (*B)->assembled = PETSC_TRUE; 8757 (*B)->insertmode = NOT_SET_VALUES; 8758 (*B)->preallocated = PETSC_TRUE; 8759 8760 if (a->colmap) { 8761 #if defined(PETSC_USE_CTABLE) 8762 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8763 #else 8764 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8765 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8766 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8767 #endif 8768 } else b->colmap = 0; 8769 if (a->garray) { 8770 PetscInt len; 8771 len = a->B->cmap->n; 8772 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8773 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8774 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8775 } else b->garray = 0; 8776 8777 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8778 b->lvec = a->lvec; 8779 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8780 8781 /* cannot use VecScatterCopy */ 8782 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8783 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8784 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8785 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8786 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8787 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8788 ierr = ISDestroy(&from);CHKERRQ(ierr); 8789 ierr = ISDestroy(&to);CHKERRQ(ierr); 8790 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8791 } 8792 ierr = MatDestroy(&At);CHKERRQ(ierr); 8793 PetscFunctionReturn(0); 8794 } 8795