1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 #if !defined(PETSC_USE_COMPLEX) 18 PetscScalar *uwork,*data,*U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM,bN,lwork,lierr,di = 1; 21 PetscInt ulw,i,nr,nc,n; 22 PetscErrorCode ierr; 23 24 PetscFunctionBegin; 25 #if defined(PETSC_MISSING_LAPACK_GESVD) 26 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 27 #else 28 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 29 if (!nr || !nc) PetscFunctionReturn(0); 30 31 /* workspace */ 32 if (!work) { 33 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 34 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 35 } else { 36 ulw = lw; 37 uwork = work; 38 } 39 n = PetscMin(nr,nc); 40 if (!rwork) { 41 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 42 } else { 43 sing = rwork; 44 } 45 46 /* SVD */ 47 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 49 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 50 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 51 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 52 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 53 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 54 ierr = PetscFPTrapPop();CHKERRQ(ierr); 55 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 56 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 57 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 58 if (!rwork) { 59 ierr = PetscFree(sing);CHKERRQ(ierr); 60 } 61 if (!work) { 62 ierr = PetscFree(uwork);CHKERRQ(ierr); 63 } 64 /* create B */ 65 if (!range) { 66 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 67 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 68 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 69 } else { 70 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 71 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 72 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 73 } 74 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 75 ierr = PetscFree(U);CHKERRQ(ierr); 76 #endif 77 #else /* PETSC_USE_COMPLEX */ 78 PetscFunctionBegin; 79 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 80 #endif 81 PetscFunctionReturn(0); 82 } 83 84 /* TODO REMOVE */ 85 #if defined(PRINT_GDET) 86 static int inc = 0; 87 static int lev = 0; 88 #endif 89 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 PetscScalar *vals,v; 122 123 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 124 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 125 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 126 /* v = PetscAbsScalar(vals[0]) */; 127 v = 1.; 128 cvals[0] = vals[0]/v; 129 cvals[1] = vals[1]/v; 130 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 131 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 132 #if defined(PRINT_GDET) 133 { 134 PetscViewer viewer; 135 char filename[256]; 136 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 137 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 138 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 140 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 141 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 142 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 143 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 144 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 145 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 146 } 147 #endif 148 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 149 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 150 } 151 152 PetscFunctionReturn(0); 153 } 154 155 PetscErrorCode PCBDDCNedelecSupport(PC pc) 156 { 157 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 158 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 159 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 160 Vec tvec; 161 PetscSF sfv; 162 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 163 MPI_Comm comm; 164 IS lned,primals,allprimals,nedfieldlocal; 165 IS *eedges,*extrows,*extcols,*alleedges; 166 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 167 PetscScalar *vals,*work; 168 PetscReal *rwork; 169 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 170 PetscInt ne,nv,Lv,order,n,field; 171 PetscInt n_neigh,*neigh,*n_shared,**shared; 172 PetscInt i,j,extmem,cum,maxsize,nee; 173 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 174 PetscInt *sfvleaves,*sfvroots; 175 PetscInt *corners,*cedges; 176 PetscInt *ecount,**eneighs,*vcount,**vneighs; 177 #if defined(PETSC_USE_DEBUG) 178 PetscInt *emarks; 179 #endif 180 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 181 PetscErrorCode ierr; 182 183 PetscFunctionBegin; 184 /* If the discrete gradient is defined for a subset of dofs and global is true, 185 it assumes G is given in global ordering for all the dofs. 186 Otherwise, the ordering is global for the Nedelec field */ 187 order = pcbddc->nedorder; 188 conforming = pcbddc->conforming; 189 field = pcbddc->nedfield; 190 global = pcbddc->nedglobal; 191 setprimal = PETSC_FALSE; 192 print = PETSC_FALSE; 193 singular = PETSC_FALSE; 194 195 /* Command line customization */ 196 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 199 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 200 /* print debug info TODO: to be removed */ 201 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 202 ierr = PetscOptionsEnd();CHKERRQ(ierr); 203 204 /* Return if there are no edges in the decomposition and the problem is not singular */ 205 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 206 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 207 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 208 if (!singular) { 209 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 210 lrc[0] = PETSC_FALSE; 211 for (i=0;i<n;i++) { 212 if (PetscRealPart(vals[i]) > 2.) { 213 lrc[0] = PETSC_TRUE; 214 break; 215 } 216 } 217 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 218 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 219 if (!lrc[1]) PetscFunctionReturn(0); 220 } 221 222 /* Get Nedelec field */ 223 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 224 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 225 if (pcbddc->n_ISForDofsLocal && field >= 0) { 226 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 227 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 228 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 229 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 230 ne = n; 231 nedfieldlocal = NULL; 232 global = PETSC_TRUE; 233 } else if (field == PETSC_DECIDE) { 234 PetscInt rst,ren,*idx; 235 236 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 237 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 238 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 239 for (i=rst;i<ren;i++) { 240 PetscInt nc; 241 242 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 243 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 244 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 245 } 246 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 247 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 248 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 249 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 250 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 251 } else { 252 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 253 } 254 255 /* Sanity checks */ 256 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 257 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 258 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 259 260 /* Just set primal dofs and return */ 261 if (setprimal) { 262 IS enedfieldlocal; 263 PetscInt *eidxs; 264 265 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 266 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 267 if (nedfieldlocal) { 268 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 269 for (i=0,cum=0;i<ne;i++) { 270 if (PetscRealPart(vals[idxs[i]]) > 2.) { 271 eidxs[cum++] = idxs[i]; 272 } 273 } 274 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 275 } else { 276 for (i=0,cum=0;i<ne;i++) { 277 if (PetscRealPart(vals[i]) > 2.) { 278 eidxs[cum++] = i; 279 } 280 } 281 } 282 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 283 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 284 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 285 ierr = PetscFree(eidxs);CHKERRQ(ierr); 286 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 287 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 288 PetscFunctionReturn(0); 289 } 290 291 /* Compute some l2g maps */ 292 if (nedfieldlocal) { 293 IS is; 294 295 /* need to map from the local Nedelec field to local numbering */ 296 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 297 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 298 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 299 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 300 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 301 if (global) { 302 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 303 el2g = al2g; 304 } else { 305 IS gis; 306 307 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 308 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 309 ierr = ISDestroy(&gis);CHKERRQ(ierr); 310 } 311 ierr = ISDestroy(&is);CHKERRQ(ierr); 312 } else { 313 /* restore default */ 314 pcbddc->nedfield = -1; 315 /* one ref for the destruction of al2g, one for el2g */ 316 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 317 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 318 el2g = al2g; 319 fl2g = NULL; 320 } 321 322 /* Start communication to drop connections for interior edges (for cc analysis only) */ 323 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 324 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 325 if (nedfieldlocal) { 326 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 327 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 328 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 329 } else { 330 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 331 } 332 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 333 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 334 335 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 336 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 337 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 338 if (global) { 339 PetscInt rst; 340 341 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 342 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 343 if (matis->sf_rootdata[i] < 2) { 344 matis->sf_rootdata[cum++] = i + rst; 345 } 346 } 347 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 348 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 349 } else { 350 PetscInt *tbz; 351 352 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 353 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 354 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 355 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 356 for (i=0,cum=0;i<ne;i++) 357 if (matis->sf_leafdata[idxs[i]] == 1) 358 tbz[cum++] = i; 359 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 360 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 361 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 362 ierr = PetscFree(tbz);CHKERRQ(ierr); 363 } 364 } else { /* we need the entire G to infer the nullspace */ 365 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 366 G = pcbddc->discretegradient; 367 } 368 369 /* Extract subdomain relevant rows of G */ 370 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 371 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 372 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 373 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 374 ierr = ISDestroy(&lned);CHKERRQ(ierr); 375 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 376 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 377 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 378 379 /* SF for nodal dofs communications */ 380 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 381 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 382 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 384 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 386 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 387 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 388 i = singular ? 2 : 1; 389 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 390 391 /* Destroy temporary G created in MATIS format and modified G */ 392 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 393 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 394 ierr = MatDestroy(&G);CHKERRQ(ierr); 395 396 if (print) { 397 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 398 ierr = MatView(lG,NULL);CHKERRQ(ierr); 399 } 400 401 /* Save lG for values insertion in change of basis */ 402 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 403 404 /* Analyze the edge-nodes connections (duplicate lG) */ 405 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 406 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 407 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 409 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 410 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 411 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 412 /* need to import the boundary specification to ensure the 413 proper detection of coarse edges' endpoints */ 414 if (pcbddc->DirichletBoundariesLocal) { 415 IS is; 416 417 if (fl2g) { 418 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 419 } else { 420 is = pcbddc->DirichletBoundariesLocal; 421 } 422 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 423 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 424 for (i=0;i<cum;i++) { 425 if (idxs[i] >= 0) { 426 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 427 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 428 } 429 } 430 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 431 if (fl2g) { 432 ierr = ISDestroy(&is);CHKERRQ(ierr); 433 } 434 } 435 if (pcbddc->NeumannBoundariesLocal) { 436 IS is; 437 438 if (fl2g) { 439 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 440 } else { 441 is = pcbddc->NeumannBoundariesLocal; 442 } 443 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 444 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 445 for (i=0;i<cum;i++) { 446 if (idxs[i] >= 0) { 447 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 448 } 449 } 450 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 451 if (fl2g) { 452 ierr = ISDestroy(&is);CHKERRQ(ierr); 453 } 454 } 455 456 /* Count neighs per dof */ 457 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 458 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 459 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 460 for (i=1,cum=0;i<n_neigh;i++) { 461 cum += n_shared[i]; 462 for (j=0;j<n_shared[i];j++) { 463 ecount[shared[i][j]]++; 464 } 465 } 466 if (ne) { 467 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 468 } 469 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 470 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 471 for (i=1;i<n_neigh;i++) { 472 for (j=0;j<n_shared[i];j++) { 473 PetscInt k = shared[i][j]; 474 eneighs[k][ecount[k]] = neigh[i]; 475 ecount[k]++; 476 } 477 } 478 for (i=0;i<ne;i++) { 479 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 480 } 481 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 482 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 483 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 484 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 485 for (i=1,cum=0;i<n_neigh;i++) { 486 cum += n_shared[i]; 487 for (j=0;j<n_shared[i];j++) { 488 vcount[shared[i][j]]++; 489 } 490 } 491 if (nv) { 492 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 493 } 494 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 495 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 496 for (i=1;i<n_neigh;i++) { 497 for (j=0;j<n_shared[i];j++) { 498 PetscInt k = shared[i][j]; 499 vneighs[k][vcount[k]] = neigh[i]; 500 vcount[k]++; 501 } 502 } 503 for (i=0;i<nv;i++) { 504 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 505 } 506 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 507 508 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 509 for proper detection of coarse edges' endpoints */ 510 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 511 for (i=0;i<ne;i++) { 512 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 513 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 514 } 515 } 516 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 517 if (!conforming) { 518 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 519 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 520 } 521 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 522 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 523 cum = 0; 524 for (i=0;i<ne;i++) { 525 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 526 if (!PetscBTLookup(btee,i)) { 527 marks[cum++] = i; 528 continue; 529 } 530 /* set badly connected edge dofs as primal */ 531 if (!conforming) { 532 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 533 marks[cum++] = i; 534 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 535 for (j=ii[i];j<ii[i+1];j++) { 536 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 537 } 538 } else { 539 /* every edge dofs should be connected trough a certain number of nodal dofs 540 to other edge dofs belonging to coarse edges 541 - at most 2 endpoints 542 - order-1 interior nodal dofs 543 - no undefined nodal dofs (nconn < order) 544 */ 545 PetscInt ends = 0,ints = 0, undef = 0; 546 for (j=ii[i];j<ii[i+1];j++) { 547 PetscInt v = jj[j],k; 548 PetscInt nconn = iit[v+1]-iit[v]; 549 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 550 if (nconn > order) ends++; 551 else if (nconn == order) ints++; 552 else undef++; 553 } 554 if (undef || ends > 2 || ints != order -1) { 555 marks[cum++] = i; 556 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 557 for (j=ii[i];j<ii[i+1];j++) { 558 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 559 } 560 } 561 } 562 } 563 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 564 if (!order && ii[i+1] != ii[i]) { 565 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 566 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 567 } 568 } 569 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 570 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 571 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 572 if (!conforming) { 573 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 574 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 575 } 576 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 577 578 /* identify splitpoints and corner candidates */ 579 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 580 if (print) { 581 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 582 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 583 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 584 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 585 } 586 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 587 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 588 for (i=0;i<nv;i++) { 589 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 590 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 591 if (!order) { /* variable order */ 592 PetscReal vorder = 0.; 593 594 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 595 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 596 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 597 ord = 1; 598 } 599 #if defined(PETSC_USE_DEBUG) 600 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 601 #endif 602 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 603 if (PetscBTLookup(btbd,jj[j])) { 604 bdir = PETSC_TRUE; 605 break; 606 } 607 if (vc != ecount[jj[j]]) { 608 sneighs = PETSC_FALSE; 609 } else { 610 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 611 for (k=0;k<vc;k++) { 612 if (vn[k] != en[k]) { 613 sneighs = PETSC_FALSE; 614 break; 615 } 616 } 617 } 618 } 619 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 620 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 621 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 622 } else if (test == ord) { 623 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 624 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 625 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 626 } else { 627 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 628 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 629 } 630 } 631 } 632 ierr = PetscFree(ecount);CHKERRQ(ierr); 633 ierr = PetscFree(vcount);CHKERRQ(ierr); 634 if (ne) { 635 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 636 } 637 if (nv) { 638 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 639 } 640 ierr = PetscFree(eneighs);CHKERRQ(ierr); 641 ierr = PetscFree(vneighs);CHKERRQ(ierr); 642 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 643 644 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 645 if (order != 1) { 646 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 647 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 648 for (i=0;i<nv;i++) { 649 if (PetscBTLookup(btvcand,i)) { 650 PetscBool found = PETSC_FALSE; 651 for (j=ii[i];j<ii[i+1] && !found;j++) { 652 PetscInt k,e = jj[j]; 653 if (PetscBTLookup(bte,e)) continue; 654 for (k=iit[e];k<iit[e+1];k++) { 655 PetscInt v = jjt[k]; 656 if (v != i && PetscBTLookup(btvcand,v)) { 657 found = PETSC_TRUE; 658 break; 659 } 660 } 661 } 662 if (!found) { 663 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 664 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 665 } else { 666 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 667 } 668 } 669 } 670 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 671 } 672 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 673 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 674 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 675 676 /* Get the local G^T explicitly */ 677 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 678 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 679 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 680 681 /* Mark interior nodal dofs */ 682 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 683 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 684 for (i=1;i<n_neigh;i++) { 685 for (j=0;j<n_shared[i];j++) { 686 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 687 } 688 } 689 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 690 691 /* communicate corners and splitpoints */ 692 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 693 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 694 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 695 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 696 697 if (print) { 698 IS tbz; 699 700 cum = 0; 701 for (i=0;i<nv;i++) 702 if (sfvleaves[i]) 703 vmarks[cum++] = i; 704 705 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 706 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 707 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 708 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 709 } 710 711 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 712 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 713 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 714 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 715 716 /* Zero rows of lGt corresponding to identified corners 717 and interior nodal dofs */ 718 cum = 0; 719 for (i=0;i<nv;i++) { 720 if (sfvleaves[i]) { 721 vmarks[cum++] = i; 722 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 723 } 724 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 725 } 726 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 727 if (print) { 728 IS tbz; 729 730 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 731 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 732 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 733 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 734 } 735 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 736 ierr = PetscFree(vmarks);CHKERRQ(ierr); 737 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 738 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 739 740 /* Recompute G */ 741 ierr = MatDestroy(&lG);CHKERRQ(ierr); 742 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 743 if (print) { 744 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 745 ierr = MatView(lG,NULL);CHKERRQ(ierr); 746 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 747 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 748 } 749 750 /* Get primal dofs (if any) */ 751 cum = 0; 752 for (i=0;i<ne;i++) { 753 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 754 } 755 if (fl2g) { 756 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 757 } 758 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 759 if (print) { 760 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 761 ierr = ISView(primals,NULL);CHKERRQ(ierr); 762 } 763 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 764 /* TODO: what if the user passed in some of them ? */ 765 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 766 ierr = ISDestroy(&primals);CHKERRQ(ierr); 767 768 /* Compute edge connectivity */ 769 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 770 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 771 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 772 if (fl2g) { 773 PetscBT btf; 774 PetscInt *iia,*jja,*iiu,*jju; 775 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 776 777 /* create CSR for all local dofs */ 778 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 779 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 780 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 781 iiu = pcbddc->mat_graph->xadj; 782 jju = pcbddc->mat_graph->adjncy; 783 } else if (pcbddc->use_local_adj) { 784 rest = PETSC_TRUE; 785 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 786 } else { 787 free = PETSC_TRUE; 788 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 789 iiu[0] = 0; 790 for (i=0;i<n;i++) { 791 iiu[i+1] = i+1; 792 jju[i] = -1; 793 } 794 } 795 796 /* import sizes of CSR */ 797 iia[0] = 0; 798 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 799 800 /* overwrite entries corresponding to the Nedelec field */ 801 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 802 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 803 for (i=0;i<ne;i++) { 804 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 805 iia[idxs[i]+1] = ii[i+1]-ii[i]; 806 } 807 808 /* iia in CSR */ 809 for (i=0;i<n;i++) iia[i+1] += iia[i]; 810 811 /* jja in CSR */ 812 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 813 for (i=0;i<n;i++) 814 if (!PetscBTLookup(btf,i)) 815 for (j=0;j<iiu[i+1]-iiu[i];j++) 816 jja[iia[i]+j] = jju[iiu[i]+j]; 817 818 /* map edge dofs connectivity */ 819 if (jj) { 820 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 821 for (i=0;i<ne;i++) { 822 PetscInt e = idxs[i]; 823 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 824 } 825 } 826 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 827 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 828 if (rest) { 829 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 830 } 831 if (free) { 832 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 833 } 834 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 835 } else { 836 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 837 } 838 839 /* Analyze interface for edge dofs */ 840 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 841 pcbddc->mat_graph->twodim = PETSC_FALSE; 842 843 /* Get coarse edges in the edge space */ 844 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 845 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 846 847 if (fl2g) { 848 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 849 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 850 for (i=0;i<nee;i++) { 851 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 852 } 853 } else { 854 eedges = alleedges; 855 primals = allprimals; 856 } 857 858 /* Mark fine edge dofs with their coarse edge id */ 859 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 860 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 861 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 862 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 863 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 864 if (print) { 865 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 866 ierr = ISView(primals,NULL);CHKERRQ(ierr); 867 } 868 869 maxsize = 0; 870 for (i=0;i<nee;i++) { 871 PetscInt size,mark = i+1; 872 873 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 874 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 875 for (j=0;j<size;j++) marks[idxs[j]] = mark; 876 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 877 maxsize = PetscMax(maxsize,size); 878 } 879 880 /* Find coarse edge endpoints */ 881 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 882 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 883 for (i=0;i<nee;i++) { 884 PetscInt mark = i+1,size; 885 886 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 887 if (!size && nedfieldlocal) continue; 888 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 889 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 890 if (print) { 891 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 892 ISView(eedges[i],NULL); 893 } 894 for (j=0;j<size;j++) { 895 PetscInt k, ee = idxs[j]; 896 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 897 for (k=ii[ee];k<ii[ee+1];k++) { 898 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 899 if (PetscBTLookup(btv,jj[k])) { 900 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 901 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 902 PetscInt k2; 903 PetscBool corner = PETSC_FALSE; 904 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 905 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 906 /* it's a corner if either is connected with an edge dof belonging to a different cc or 907 if the edge dof lie on the natural part of the boundary */ 908 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 909 corner = PETSC_TRUE; 910 break; 911 } 912 } 913 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 914 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 915 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 916 } else { 917 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 918 } 919 } 920 } 921 } 922 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 923 } 924 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 925 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 926 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 927 928 /* Reset marked primal dofs */ 929 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 930 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 931 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 932 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 933 934 /* Now use the initial lG */ 935 ierr = MatDestroy(&lG);CHKERRQ(ierr); 936 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 937 lG = lGinit; 938 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 939 940 /* Compute extended cols indices */ 941 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 942 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 943 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 944 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 945 i *= maxsize; 946 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 947 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 948 eerr = PETSC_FALSE; 949 for (i=0;i<nee;i++) { 950 PetscInt size,found = 0; 951 952 cum = 0; 953 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 954 if (!size && nedfieldlocal) continue; 955 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 956 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 957 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 958 for (j=0;j<size;j++) { 959 PetscInt k,ee = idxs[j]; 960 for (k=ii[ee];k<ii[ee+1];k++) { 961 PetscInt vv = jj[k]; 962 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 963 else if (!PetscBTLookupSet(btvc,vv)) found++; 964 } 965 } 966 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 967 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 968 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 969 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 970 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 971 /* it may happen that endpoints are not defined at this point 972 if it is the case, mark this edge for a second pass */ 973 if (cum != size -1 || found != 2) { 974 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 975 if (print) { 976 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 977 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 978 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 979 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 980 } 981 eerr = PETSC_TRUE; 982 } 983 } 984 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 985 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 986 if (done) { 987 PetscInt *newprimals; 988 989 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 990 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 991 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 992 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 993 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 994 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 995 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 996 for (i=0;i<nee;i++) { 997 PetscBool has_candidates = PETSC_FALSE; 998 if (PetscBTLookup(bter,i)) { 999 PetscInt size,mark = i+1; 1000 1001 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1002 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1003 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1004 for (j=0;j<size;j++) { 1005 PetscInt k,ee = idxs[j]; 1006 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1007 for (k=ii[ee];k<ii[ee+1];k++) { 1008 /* set all candidates located on the edge as corners */ 1009 if (PetscBTLookup(btvcand,jj[k])) { 1010 PetscInt k2,vv = jj[k]; 1011 has_candidates = PETSC_TRUE; 1012 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1013 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1014 /* set all edge dofs connected to candidate as primals */ 1015 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1016 if (marks[jjt[k2]] == mark) { 1017 PetscInt k3,ee2 = jjt[k2]; 1018 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1019 newprimals[cum++] = ee2; 1020 /* finally set the new corners */ 1021 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1022 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1023 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1024 } 1025 } 1026 } 1027 } else { 1028 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1029 } 1030 } 1031 } 1032 if (!has_candidates) { /* circular edge */ 1033 PetscInt k, ee = idxs[0],*tmarks; 1034 1035 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1036 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1037 for (k=ii[ee];k<ii[ee+1];k++) { 1038 PetscInt k2; 1039 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1040 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1041 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1042 } 1043 for (j=0;j<size;j++) { 1044 if (tmarks[idxs[j]] > 1) { 1045 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1046 newprimals[cum++] = idxs[j]; 1047 } 1048 } 1049 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1050 } 1051 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1052 } 1053 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1054 } 1055 ierr = PetscFree(extcols);CHKERRQ(ierr); 1056 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1057 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1058 if (fl2g) { 1059 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1060 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1061 for (i=0;i<nee;i++) { 1062 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1063 } 1064 ierr = PetscFree(eedges);CHKERRQ(ierr); 1065 } 1066 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1067 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1068 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1069 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1070 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1071 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1072 pcbddc->mat_graph->twodim = PETSC_FALSE; 1073 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1074 if (fl2g) { 1075 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1076 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1077 for (i=0;i<nee;i++) { 1078 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1079 } 1080 } else { 1081 eedges = alleedges; 1082 primals = allprimals; 1083 } 1084 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1085 1086 /* Mark again */ 1087 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1088 for (i=0;i<nee;i++) { 1089 PetscInt size,mark = i+1; 1090 1091 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1092 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1093 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1094 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1095 } 1096 if (print) { 1097 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1098 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1099 } 1100 1101 /* Recompute extended cols */ 1102 eerr = PETSC_FALSE; 1103 for (i=0;i<nee;i++) { 1104 PetscInt size; 1105 1106 cum = 0; 1107 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1108 if (!size && nedfieldlocal) continue; 1109 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1110 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1111 for (j=0;j<size;j++) { 1112 PetscInt k,ee = idxs[j]; 1113 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1114 } 1115 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1116 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1117 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1118 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1119 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1120 if (cum != size -1) { 1121 if (print) { 1122 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1123 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1125 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1126 } 1127 eerr = PETSC_TRUE; 1128 } 1129 } 1130 } 1131 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1132 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1133 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1134 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1135 /* an error should not occur at this point */ 1136 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1137 1138 /* Check the number of endpoints */ 1139 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1140 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1141 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1142 for (i=0;i<nee;i++) { 1143 PetscInt size, found = 0, gc[2]; 1144 1145 /* init with defaults */ 1146 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1147 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1148 if (!size && nedfieldlocal) continue; 1149 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1150 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1151 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1152 for (j=0;j<size;j++) { 1153 PetscInt k,ee = idxs[j]; 1154 for (k=ii[ee];k<ii[ee+1];k++) { 1155 PetscInt vv = jj[k]; 1156 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1157 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1158 corners[i*2+found++] = vv; 1159 } 1160 } 1161 } 1162 if (found != 2) { 1163 PetscInt e; 1164 if (fl2g) { 1165 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1166 } else { 1167 e = idxs[0]; 1168 } 1169 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1170 } 1171 1172 /* get primal dof index on this coarse edge */ 1173 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1174 if (gc[0] > gc[1]) { 1175 PetscInt swap = corners[2*i]; 1176 corners[2*i] = corners[2*i+1]; 1177 corners[2*i+1] = swap; 1178 } 1179 cedges[i] = idxs[size-1]; 1180 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1181 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1182 } 1183 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1184 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1185 1186 #if defined(PETSC_USE_DEBUG) 1187 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1188 not interfere with neighbouring coarse edges */ 1189 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1190 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1191 for (i=0;i<nv;i++) { 1192 PetscInt emax = 0,eemax = 0; 1193 1194 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1195 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1196 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1197 for (j=1;j<nee+1;j++) { 1198 if (emax < emarks[j]) { 1199 emax = emarks[j]; 1200 eemax = j; 1201 } 1202 } 1203 /* not relevant for edges */ 1204 if (!eemax) continue; 1205 1206 for (j=ii[i];j<ii[i+1];j++) { 1207 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1208 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1209 } 1210 } 1211 } 1212 ierr = PetscFree(emarks);CHKERRQ(ierr); 1213 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1214 #endif 1215 1216 /* Compute extended rows indices for edge blocks of the change of basis */ 1217 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1218 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1219 extmem *= maxsize; 1220 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1221 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1222 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1223 for (i=0;i<nv;i++) { 1224 PetscInt mark = 0,size,start; 1225 1226 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1227 for (j=ii[i];j<ii[i+1];j++) 1228 if (marks[jj[j]] && !mark) 1229 mark = marks[jj[j]]; 1230 1231 /* not relevant */ 1232 if (!mark) continue; 1233 1234 /* import extended row */ 1235 mark--; 1236 start = mark*extmem+extrowcum[mark]; 1237 size = ii[i+1]-ii[i]; 1238 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1239 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1240 extrowcum[mark] += size; 1241 } 1242 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1243 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1244 ierr = PetscFree(marks);CHKERRQ(ierr); 1245 1246 /* Compress extrows */ 1247 cum = 0; 1248 for (i=0;i<nee;i++) { 1249 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1250 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1251 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1252 cum = PetscMax(cum,size); 1253 } 1254 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1255 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1256 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1257 1258 /* Workspace for lapack inner calls and VecSetValues */ 1259 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1260 1261 /* Create change of basis matrix (preallocation can be improved) */ 1262 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1263 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1264 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1265 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1266 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1267 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1268 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1269 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1270 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1271 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1272 1273 /* Defaults to identity */ 1274 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1275 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1276 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1277 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1278 1279 /* Create discrete gradient for the coarser level if needed */ 1280 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1281 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1282 if (pcbddc->current_level < pcbddc->max_levels) { 1283 ISLocalToGlobalMapping cel2g,cvl2g; 1284 IS wis,gwis; 1285 PetscInt cnv,cne; 1286 1287 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1288 if (fl2g) { 1289 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1290 } else { 1291 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1292 pcbddc->nedclocal = wis; 1293 } 1294 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1295 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1296 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1297 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1298 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1299 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1300 1301 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1302 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1303 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1304 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1305 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1306 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1307 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1308 1309 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1310 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1311 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1312 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1313 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1314 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1315 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1316 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1317 } 1318 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1319 1320 #if defined(PRINT_GDET) 1321 inc = 0; 1322 lev = pcbddc->current_level; 1323 #endif 1324 1325 /* Insert values in the change of basis matrix */ 1326 for (i=0;i<nee;i++) { 1327 Mat Gins = NULL, GKins = NULL; 1328 IS cornersis = NULL; 1329 PetscScalar cvals[2]; 1330 1331 if (pcbddc->nedcG) { 1332 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1333 } 1334 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1335 if (Gins && GKins) { 1336 PetscScalar *data; 1337 const PetscInt *rows,*cols; 1338 PetscInt nrh,nch,nrc,ncc; 1339 1340 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1341 /* H1 */ 1342 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1343 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1344 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1345 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1346 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1347 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1348 /* complement */ 1349 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1350 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1351 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1352 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1353 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1354 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1355 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1356 1357 /* coarse discrete gradient */ 1358 if (pcbddc->nedcG) { 1359 PetscInt cols[2]; 1360 1361 cols[0] = 2*i; 1362 cols[1] = 2*i+1; 1363 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1364 } 1365 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1366 } 1367 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1368 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1369 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1370 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1371 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1372 } 1373 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1374 1375 /* Start assembling */ 1376 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1377 if (pcbddc->nedcG) { 1378 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1379 } 1380 1381 /* Free */ 1382 if (fl2g) { 1383 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1384 for (i=0;i<nee;i++) { 1385 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(eedges);CHKERRQ(ierr); 1388 } 1389 1390 /* hack mat_graph with primal dofs on the coarse edges */ 1391 { 1392 PCBDDCGraph graph = pcbddc->mat_graph; 1393 PetscInt *oqueue = graph->queue; 1394 PetscInt *ocptr = graph->cptr; 1395 PetscInt ncc,*idxs; 1396 1397 /* find first primal edge */ 1398 if (pcbddc->nedclocal) { 1399 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1400 } else { 1401 if (fl2g) { 1402 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1403 } 1404 idxs = cedges; 1405 } 1406 cum = 0; 1407 while (cum < nee && cedges[cum] < 0) cum++; 1408 1409 /* adapt connected components */ 1410 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1411 graph->cptr[0] = 0; 1412 for (i=0,ncc=0;i<graph->ncc;i++) { 1413 PetscInt lc = ocptr[i+1]-ocptr[i]; 1414 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1415 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1416 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1417 ncc++; 1418 lc--; 1419 cum++; 1420 while (cum < nee && cedges[cum] < 0) cum++; 1421 } 1422 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1423 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1424 ncc++; 1425 } 1426 graph->ncc = ncc; 1427 if (pcbddc->nedclocal) { 1428 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1429 } 1430 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1431 } 1432 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1433 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1434 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1435 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1436 1437 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1438 ierr = PetscFree(extrow);CHKERRQ(ierr); 1439 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1440 ierr = PetscFree(corners);CHKERRQ(ierr); 1441 ierr = PetscFree(cedges);CHKERRQ(ierr); 1442 ierr = PetscFree(extrows);CHKERRQ(ierr); 1443 ierr = PetscFree(extcols);CHKERRQ(ierr); 1444 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1445 1446 /* Complete assembling */ 1447 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1448 if (pcbddc->nedcG) { 1449 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1450 #if 0 1451 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1452 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1453 #endif 1454 } 1455 1456 /* set change of basis */ 1457 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1458 ierr = MatDestroy(&T);CHKERRQ(ierr); 1459 1460 PetscFunctionReturn(0); 1461 } 1462 1463 /* the near-null space of BDDC carries information on quadrature weights, 1464 and these can be collinear -> so cheat with MatNullSpaceCreate 1465 and create a suitable set of basis vectors first */ 1466 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1467 { 1468 PetscErrorCode ierr; 1469 PetscInt i; 1470 1471 PetscFunctionBegin; 1472 for (i=0;i<nvecs;i++) { 1473 PetscInt first,last; 1474 1475 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1476 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1477 if (i>=first && i < last) { 1478 PetscScalar *data; 1479 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1480 if (!has_const) { 1481 data[i-first] = 1.; 1482 } else { 1483 data[2*i-first] = 1./PetscSqrtReal(2.); 1484 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1485 } 1486 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1487 } 1488 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1489 } 1490 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1491 for (i=0;i<nvecs;i++) { /* reset vectors */ 1492 PetscInt first,last; 1493 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1494 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1495 if (i>=first && i < last) { 1496 PetscScalar *data; 1497 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1498 if (!has_const) { 1499 data[i-first] = 0.; 1500 } else { 1501 data[2*i-first] = 0.; 1502 data[2*i-first+1] = 0.; 1503 } 1504 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1505 } 1506 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1507 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1508 } 1509 PetscFunctionReturn(0); 1510 } 1511 1512 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1513 { 1514 Mat loc_divudotp; 1515 Vec p,v,vins,quad_vec,*quad_vecs; 1516 ISLocalToGlobalMapping map; 1517 PetscScalar *vals; 1518 const PetscScalar *array; 1519 PetscInt i,maxneighs,maxsize; 1520 PetscInt n_neigh,*neigh,*n_shared,**shared; 1521 PetscMPIInt rank; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1526 ierr = MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1527 maxsize = 0; 1528 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1529 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1530 /* create vectors to hold quadrature weights */ 1531 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1532 if (!transpose) { 1533 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1534 } else { 1535 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1536 } 1537 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1538 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1539 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1540 for (i=0;i<maxneighs;i++) { 1541 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1542 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1543 } 1544 1545 /* compute local quad vec */ 1546 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1547 if (!transpose) { 1548 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1549 } else { 1550 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1551 } 1552 ierr = VecSet(p,1.);CHKERRQ(ierr); 1553 if (!transpose) { 1554 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1555 } else { 1556 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1557 } 1558 if (vl2l) { 1559 Mat lA; 1560 VecScatter sc; 1561 1562 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1563 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1564 ierr = VecScatterCreate(v,vl2l,vins,NULL,&sc);CHKERRQ(ierr); 1565 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1566 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1567 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1568 } else { 1569 vins = v; 1570 } 1571 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1572 ierr = VecDestroy(&p);CHKERRQ(ierr); 1573 1574 /* insert in global quadrature vecs */ 1575 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1576 for (i=0;i<n_neigh;i++) { 1577 const PetscInt *idxs; 1578 PetscInt idx,nn,j; 1579 1580 idxs = shared[i]; 1581 nn = n_shared[i]; 1582 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1583 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1584 idx = -(idx+1); 1585 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1586 } 1587 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1588 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1589 if (vl2l) { 1590 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1591 } 1592 ierr = VecDestroy(&v);CHKERRQ(ierr); 1593 ierr = PetscFree(vals);CHKERRQ(ierr); 1594 1595 /* assemble near null space */ 1596 for (i=0;i<maxneighs;i++) { 1597 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1598 } 1599 for (i=0;i<maxneighs;i++) { 1600 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1601 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1602 } 1603 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1604 PetscFunctionReturn(0); 1605 } 1606 1607 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1608 { 1609 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1610 PetscErrorCode ierr; 1611 1612 PetscFunctionBegin; 1613 if (primalv) { 1614 if (pcbddc->user_primal_vertices_local) { 1615 IS list[2], newp; 1616 1617 list[0] = primalv; 1618 list[1] = pcbddc->user_primal_vertices_local; 1619 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1620 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1621 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1622 pcbddc->user_primal_vertices_local = newp; 1623 } else { 1624 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1625 } 1626 } 1627 PetscFunctionReturn(0); 1628 } 1629 1630 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1631 { 1632 PetscErrorCode ierr; 1633 Vec local,global; 1634 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1635 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1636 PetscBool monolithic = PETSC_FALSE; 1637 1638 PetscFunctionBegin; 1639 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1640 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1641 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1642 /* need to convert from global to local topology information and remove references to information in global ordering */ 1643 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1644 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1645 if (monolithic) { /* just get block size to properly compute vertices */ 1646 if (pcbddc->vertex_size == 1) { 1647 ierr = MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);CHKERRQ(ierr); 1648 } 1649 goto boundary; 1650 } 1651 1652 if (pcbddc->user_provided_isfordofs) { 1653 if (pcbddc->n_ISForDofs) { 1654 PetscInt i; 1655 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1656 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1657 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1658 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1659 } 1660 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1661 pcbddc->n_ISForDofs = 0; 1662 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1663 } 1664 } else { 1665 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1666 DM dm; 1667 1668 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1669 if (!dm) { 1670 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1671 } 1672 if (dm) { 1673 IS *fields; 1674 PetscInt nf,i; 1675 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1676 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1677 for (i=0;i<nf;i++) { 1678 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1679 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1680 } 1681 ierr = PetscFree(fields);CHKERRQ(ierr); 1682 pcbddc->n_ISForDofsLocal = nf; 1683 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1684 PetscContainer c; 1685 1686 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1687 if (c) { 1688 MatISLocalFields lf; 1689 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1690 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1691 } else { /* fallback, create the default fields if bs > 1 */ 1692 PetscInt i, n = matis->A->rmap->n; 1693 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1694 if (i > 1) { 1695 pcbddc->n_ISForDofsLocal = i; 1696 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1697 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1698 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1699 } 1700 } 1701 } 1702 } 1703 } else { 1704 PetscInt i; 1705 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1706 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1707 } 1708 } 1709 } 1710 1711 boundary: 1712 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1713 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1714 } else if (pcbddc->DirichletBoundariesLocal) { 1715 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1716 } 1717 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1718 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1719 } else if (pcbddc->NeumannBoundariesLocal) { 1720 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1721 } 1722 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1723 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1724 } 1725 ierr = VecDestroy(&global);CHKERRQ(ierr); 1726 ierr = VecDestroy(&local);CHKERRQ(ierr); 1727 /* detect local disconnected subdomains if requested (use matis->A) */ 1728 if (pcbddc->detect_disconnected) { 1729 IS primalv = NULL; 1730 PetscInt i; 1731 1732 for (i=0;i<pcbddc->n_local_subs;i++) { 1733 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1734 } 1735 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1736 ierr = PCBDDCDetectDisconnectedComponents(pc,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1737 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1738 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1739 } 1740 /* early stage corner detection */ 1741 { 1742 DM dm; 1743 1744 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1745 if (dm) { 1746 PetscBool isda; 1747 1748 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1749 if (isda) { 1750 ISLocalToGlobalMapping l2l; 1751 IS corners; 1752 Mat lA; 1753 1754 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1755 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1756 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1757 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1758 if (l2l) { 1759 const PetscInt *idx; 1760 PetscInt bs,*idxout,n; 1761 1762 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1763 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1764 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1765 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1766 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1767 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1768 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1769 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1770 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1771 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1772 } else { /* not from DMDA */ 1773 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1774 } 1775 } 1776 } 1777 } 1778 PetscFunctionReturn(0); 1779 } 1780 1781 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1782 { 1783 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1784 PetscErrorCode ierr; 1785 IS nis; 1786 const PetscInt *idxs; 1787 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1788 PetscBool *ld; 1789 1790 PetscFunctionBegin; 1791 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1792 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1793 if (mop == MPI_LAND) { 1794 /* init rootdata with true */ 1795 ld = (PetscBool*) matis->sf_rootdata; 1796 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1797 } else { 1798 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1799 } 1800 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1801 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1802 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1803 ld = (PetscBool*) matis->sf_leafdata; 1804 for (i=0;i<nd;i++) 1805 if (-1 < idxs[i] && idxs[i] < n) 1806 ld[idxs[i]] = PETSC_TRUE; 1807 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1808 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1809 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1810 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1811 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1812 if (mop == MPI_LAND) { 1813 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1814 } else { 1815 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1816 } 1817 for (i=0,nnd=0;i<n;i++) 1818 if (ld[i]) 1819 nidxs[nnd++] = i; 1820 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1821 ierr = ISDestroy(is);CHKERRQ(ierr); 1822 *is = nis; 1823 PetscFunctionReturn(0); 1824 } 1825 1826 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1827 { 1828 PC_IS *pcis = (PC_IS*)(pc->data); 1829 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1830 PetscErrorCode ierr; 1831 1832 PetscFunctionBegin; 1833 if (!pcbddc->benign_have_null) { 1834 PetscFunctionReturn(0); 1835 } 1836 if (pcbddc->ChangeOfBasisMatrix) { 1837 Vec swap; 1838 1839 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1840 swap = pcbddc->work_change; 1841 pcbddc->work_change = r; 1842 r = swap; 1843 } 1844 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1845 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1846 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1847 ierr = VecSet(z,0.);CHKERRQ(ierr); 1848 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1849 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1850 if (pcbddc->ChangeOfBasisMatrix) { 1851 pcbddc->work_change = r; 1852 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1853 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1854 } 1855 PetscFunctionReturn(0); 1856 } 1857 1858 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1859 { 1860 PCBDDCBenignMatMult_ctx ctx; 1861 PetscErrorCode ierr; 1862 PetscBool apply_right,apply_left,reset_x; 1863 1864 PetscFunctionBegin; 1865 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1866 if (transpose) { 1867 apply_right = ctx->apply_left; 1868 apply_left = ctx->apply_right; 1869 } else { 1870 apply_right = ctx->apply_right; 1871 apply_left = ctx->apply_left; 1872 } 1873 reset_x = PETSC_FALSE; 1874 if (apply_right) { 1875 const PetscScalar *ax; 1876 PetscInt nl,i; 1877 1878 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1879 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1880 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1881 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1882 for (i=0;i<ctx->benign_n;i++) { 1883 PetscScalar sum,val; 1884 const PetscInt *idxs; 1885 PetscInt nz,j; 1886 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1887 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1888 sum = 0.; 1889 if (ctx->apply_p0) { 1890 val = ctx->work[idxs[nz-1]]; 1891 for (j=0;j<nz-1;j++) { 1892 sum += ctx->work[idxs[j]]; 1893 ctx->work[idxs[j]] += val; 1894 } 1895 } else { 1896 for (j=0;j<nz-1;j++) { 1897 sum += ctx->work[idxs[j]]; 1898 } 1899 } 1900 ctx->work[idxs[nz-1]] -= sum; 1901 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1902 } 1903 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1904 reset_x = PETSC_TRUE; 1905 } 1906 if (transpose) { 1907 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1908 } else { 1909 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1910 } 1911 if (reset_x) { 1912 ierr = VecResetArray(x);CHKERRQ(ierr); 1913 } 1914 if (apply_left) { 1915 PetscScalar *ay; 1916 PetscInt i; 1917 1918 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1919 for (i=0;i<ctx->benign_n;i++) { 1920 PetscScalar sum,val; 1921 const PetscInt *idxs; 1922 PetscInt nz,j; 1923 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1924 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1925 val = -ay[idxs[nz-1]]; 1926 if (ctx->apply_p0) { 1927 sum = 0.; 1928 for (j=0;j<nz-1;j++) { 1929 sum += ay[idxs[j]]; 1930 ay[idxs[j]] += val; 1931 } 1932 ay[idxs[nz-1]] += sum; 1933 } else { 1934 for (j=0;j<nz-1;j++) { 1935 ay[idxs[j]] += val; 1936 } 1937 ay[idxs[nz-1]] = 0.; 1938 } 1939 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1940 } 1941 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1942 } 1943 PetscFunctionReturn(0); 1944 } 1945 1946 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1947 { 1948 PetscErrorCode ierr; 1949 1950 PetscFunctionBegin; 1951 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1952 PetscFunctionReturn(0); 1953 } 1954 1955 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1956 { 1957 PetscErrorCode ierr; 1958 1959 PetscFunctionBegin; 1960 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1961 PetscFunctionReturn(0); 1962 } 1963 1964 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1965 { 1966 PC_IS *pcis = (PC_IS*)pc->data; 1967 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1968 PCBDDCBenignMatMult_ctx ctx; 1969 PetscErrorCode ierr; 1970 1971 PetscFunctionBegin; 1972 if (!restore) { 1973 Mat A_IB,A_BI; 1974 PetscScalar *work; 1975 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1976 1977 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1978 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1979 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1980 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1981 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1982 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1983 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1984 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1985 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1986 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1987 ctx->apply_left = PETSC_TRUE; 1988 ctx->apply_right = PETSC_FALSE; 1989 ctx->apply_p0 = PETSC_FALSE; 1990 ctx->benign_n = pcbddc->benign_n; 1991 if (reuse) { 1992 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1993 ctx->free = PETSC_FALSE; 1994 } else { /* TODO: could be optimized for successive solves */ 1995 ISLocalToGlobalMapping N_to_D; 1996 PetscInt i; 1997 1998 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1999 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2000 for (i=0;i<pcbddc->benign_n;i++) { 2001 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2002 } 2003 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2004 ctx->free = PETSC_TRUE; 2005 } 2006 ctx->A = pcis->A_IB; 2007 ctx->work = work; 2008 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2009 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2010 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2011 pcis->A_IB = A_IB; 2012 2013 /* A_BI as A_IB^T */ 2014 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2015 pcbddc->benign_original_mat = pcis->A_BI; 2016 pcis->A_BI = A_BI; 2017 } else { 2018 if (!pcbddc->benign_original_mat) { 2019 PetscFunctionReturn(0); 2020 } 2021 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2022 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2023 pcis->A_IB = ctx->A; 2024 ctx->A = NULL; 2025 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2026 pcis->A_BI = pcbddc->benign_original_mat; 2027 pcbddc->benign_original_mat = NULL; 2028 if (ctx->free) { 2029 PetscInt i; 2030 for (i=0;i<ctx->benign_n;i++) { 2031 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2032 } 2033 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2034 } 2035 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2036 ierr = PetscFree(ctx);CHKERRQ(ierr); 2037 } 2038 PetscFunctionReturn(0); 2039 } 2040 2041 /* used just in bddc debug mode */ 2042 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2043 { 2044 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2045 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2046 Mat An; 2047 PetscErrorCode ierr; 2048 2049 PetscFunctionBegin; 2050 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2051 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2052 if (is1) { 2053 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2054 ierr = MatDestroy(&An);CHKERRQ(ierr); 2055 } else { 2056 *B = An; 2057 } 2058 PetscFunctionReturn(0); 2059 } 2060 2061 /* TODO: add reuse flag */ 2062 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2063 { 2064 Mat Bt; 2065 PetscScalar *a,*bdata; 2066 const PetscInt *ii,*ij; 2067 PetscInt m,n,i,nnz,*bii,*bij; 2068 PetscBool flg_row; 2069 PetscErrorCode ierr; 2070 2071 PetscFunctionBegin; 2072 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2073 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2074 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2075 nnz = n; 2076 for (i=0;i<ii[n];i++) { 2077 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2078 } 2079 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2080 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2081 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2082 nnz = 0; 2083 bii[0] = 0; 2084 for (i=0;i<n;i++) { 2085 PetscInt j; 2086 for (j=ii[i];j<ii[i+1];j++) { 2087 PetscScalar entry = a[j]; 2088 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 2089 bij[nnz] = ij[j]; 2090 bdata[nnz] = entry; 2091 nnz++; 2092 } 2093 } 2094 bii[i+1] = nnz; 2095 } 2096 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2097 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2098 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2099 { 2100 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2101 b->free_a = PETSC_TRUE; 2102 b->free_ij = PETSC_TRUE; 2103 } 2104 *B = Bt; 2105 PetscFunctionReturn(0); 2106 } 2107 2108 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscInt *ncc, IS* cc[], IS* primalv) 2109 { 2110 Mat B = NULL; 2111 DM dm; 2112 IS is_dummy,*cc_n; 2113 ISLocalToGlobalMapping l2gmap_dummy; 2114 PCBDDCGraph graph; 2115 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2116 PetscInt i,n; 2117 PetscInt *xadj,*adjncy; 2118 PetscBool isplex = PETSC_FALSE; 2119 PetscErrorCode ierr; 2120 2121 PetscFunctionBegin; 2122 if (ncc) *ncc = 0; 2123 if (cc) *cc = NULL; 2124 if (primalv) *primalv = NULL; 2125 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2126 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2127 if (!dm) { 2128 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2129 } 2130 if (dm) { 2131 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2132 } 2133 if (isplex) { /* this code has been modified from plexpartition.c */ 2134 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2135 PetscInt *adj = NULL; 2136 IS cellNumbering; 2137 const PetscInt *cellNum; 2138 PetscBool useCone, useClosure; 2139 PetscSection section; 2140 PetscSegBuffer adjBuffer; 2141 PetscSF sfPoint; 2142 PetscErrorCode ierr; 2143 2144 PetscFunctionBegin; 2145 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2146 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2147 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2148 /* Build adjacency graph via a section/segbuffer */ 2149 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2150 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2151 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2152 /* Always use FVM adjacency to create partitioner graph */ 2153 ierr = DMPlexGetAdjacencyUseCone(dm, &useCone);CHKERRQ(ierr); 2154 ierr = DMPlexGetAdjacencyUseClosure(dm, &useClosure);CHKERRQ(ierr); 2155 ierr = DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);CHKERRQ(ierr); 2156 ierr = DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);CHKERRQ(ierr); 2157 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2158 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2159 for (n = 0, p = pStart; p < pEnd; p++) { 2160 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2161 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2162 adjSize = PETSC_DETERMINE; 2163 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2164 for (a = 0; a < adjSize; ++a) { 2165 const PetscInt point = adj[a]; 2166 if (pStart <= point && point < pEnd) { 2167 PetscInt *PETSC_RESTRICT pBuf; 2168 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2169 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2170 *pBuf = point; 2171 } 2172 } 2173 n++; 2174 } 2175 ierr = DMPlexSetAdjacencyUseCone(dm, useCone);CHKERRQ(ierr); 2176 ierr = DMPlexSetAdjacencyUseClosure(dm, useClosure);CHKERRQ(ierr); 2177 /* Derive CSR graph from section/segbuffer */ 2178 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2179 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2180 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2181 for (idx = 0, p = pStart; p < pEnd; p++) { 2182 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2183 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2184 } 2185 xadj[n] = size; 2186 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2187 /* Clean up */ 2188 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2189 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2190 ierr = PetscFree(adj);CHKERRQ(ierr); 2191 graph->xadj = xadj; 2192 graph->adjncy = adjncy; 2193 } else { 2194 Mat A; 2195 PetscBool filter = PETSC_FALSE, isseqaij, flg_row; 2196 2197 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2198 if (!A->rmap->N || !A->cmap->N) { 2199 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2200 PetscFunctionReturn(0); 2201 } 2202 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2203 if (!isseqaij && filter) { 2204 PetscBool isseqdense; 2205 2206 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2207 if (!isseqdense) { 2208 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2209 } else { /* TODO: rectangular case and LDA */ 2210 PetscScalar *array; 2211 PetscReal chop=1.e-6; 2212 2213 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2214 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2215 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2216 for (i=0;i<n;i++) { 2217 PetscInt j; 2218 for (j=i+1;j<n;j++) { 2219 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2220 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2221 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2222 } 2223 } 2224 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2225 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2226 } 2227 } else { 2228 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2229 B = A; 2230 } 2231 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2232 2233 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2234 if (filter) { 2235 PetscScalar *data; 2236 PetscInt j,cum; 2237 2238 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2239 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2240 cum = 0; 2241 for (i=0;i<n;i++) { 2242 PetscInt t; 2243 2244 for (j=xadj[i];j<xadj[i+1];j++) { 2245 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2246 continue; 2247 } 2248 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2249 } 2250 t = xadj_filtered[i]; 2251 xadj_filtered[i] = cum; 2252 cum += t; 2253 } 2254 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2255 graph->xadj = xadj_filtered; 2256 graph->adjncy = adjncy_filtered; 2257 } else { 2258 graph->xadj = xadj; 2259 graph->adjncy = adjncy; 2260 } 2261 } 2262 /* compute local connected components using PCBDDCGraph */ 2263 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2264 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2265 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2266 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2267 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2268 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2269 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2270 2271 /* partial clean up */ 2272 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2273 if (B) { 2274 PetscBool flg_row; 2275 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2276 ierr = MatDestroy(&B);CHKERRQ(ierr); 2277 } 2278 if (isplex) { 2279 ierr = PetscFree(xadj);CHKERRQ(ierr); 2280 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2281 } 2282 2283 /* get back data */ 2284 if (isplex) { 2285 if (ncc) *ncc = graph->ncc; 2286 if (cc || primalv) { 2287 Mat A; 2288 PetscBT btv,btvt; 2289 PetscSection subSection; 2290 PetscInt *ids,cum,cump,*cids,*pids; 2291 2292 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2293 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2294 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2295 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2296 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2297 2298 cids[0] = 0; 2299 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2300 PetscInt j; 2301 2302 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2303 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2304 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2305 2306 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2307 for (k = 0; k < 2*size; k += 2) { 2308 PetscInt s, p = closure[k], off, dof, cdof; 2309 2310 ierr = PetscSectionGetConstraintDof(subSection, p, &cdof);CHKERRQ(ierr); 2311 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2312 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2313 for (s = 0; s < dof-cdof; s++) { 2314 if (PetscBTLookupSet(btvt,off+s)) continue; 2315 if (!PetscBTLookup(btv,off+s)) { 2316 ids[cum++] = off+s; 2317 } else { /* cross-vertex */ 2318 pids[cump++] = off+s; 2319 } 2320 } 2321 } 2322 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2323 } 2324 cids[i+1] = cum; 2325 /* mark dofs as already assigned */ 2326 for (j = cids[i]; j < cids[i+1]; j++) { 2327 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2328 } 2329 } 2330 if (cc) { 2331 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2332 for (i = 0; i < graph->ncc; i++) { 2333 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2334 } 2335 *cc = cc_n; 2336 } 2337 if (primalv) { 2338 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2339 } 2340 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2341 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2342 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2343 } 2344 } else { 2345 if (ncc) *ncc = graph->ncc; 2346 if (cc) { 2347 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2348 for (i=0;i<graph->ncc;i++) { 2349 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2350 } 2351 *cc = cc_n; 2352 } 2353 } 2354 /* clean up graph */ 2355 graph->xadj = 0; 2356 graph->adjncy = 0; 2357 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2358 PetscFunctionReturn(0); 2359 } 2360 2361 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2362 { 2363 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2364 PC_IS* pcis = (PC_IS*)(pc->data); 2365 IS dirIS = NULL; 2366 PetscInt i; 2367 PetscErrorCode ierr; 2368 2369 PetscFunctionBegin; 2370 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2371 if (zerodiag) { 2372 Mat A; 2373 Vec vec3_N; 2374 PetscScalar *vals; 2375 const PetscInt *idxs; 2376 PetscInt nz,*count; 2377 2378 /* p0 */ 2379 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2380 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2381 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2382 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2383 for (i=0;i<nz;i++) vals[i] = 1.; 2384 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2385 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2386 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2387 /* v_I */ 2388 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2389 for (i=0;i<nz;i++) vals[i] = 0.; 2390 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2391 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2392 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2393 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2394 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2395 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2396 if (dirIS) { 2397 PetscInt n; 2398 2399 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2400 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2401 for (i=0;i<n;i++) vals[i] = 0.; 2402 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2403 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2404 } 2405 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2406 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2407 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2408 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2409 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2410 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2411 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2412 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2413 ierr = PetscFree(vals);CHKERRQ(ierr); 2414 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2415 2416 /* there should not be any pressure dofs lying on the interface */ 2417 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2418 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2419 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2420 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2421 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2422 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2423 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2424 ierr = PetscFree(count);CHKERRQ(ierr); 2425 } 2426 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2427 2428 /* check PCBDDCBenignGetOrSetP0 */ 2429 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2430 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2431 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2432 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2433 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2434 for (i=0;i<pcbddc->benign_n;i++) { 2435 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2436 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2437 } 2438 PetscFunctionReturn(0); 2439 } 2440 2441 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2442 { 2443 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2444 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2445 PetscInt nz,n; 2446 PetscInt *interior_dofs,n_interior_dofs,nneu; 2447 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2448 PetscErrorCode ierr; 2449 2450 PetscFunctionBegin; 2451 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2452 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2453 for (n=0;n<pcbddc->benign_n;n++) { 2454 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2455 } 2456 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2457 pcbddc->benign_n = 0; 2458 2459 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2460 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2461 Checks if all the pressure dofs in each subdomain have a zero diagonal 2462 If not, a change of basis on pressures is not needed 2463 since the local Schur complements are already SPD 2464 */ 2465 has_null_pressures = PETSC_TRUE; 2466 have_null = PETSC_TRUE; 2467 if (pcbddc->n_ISForDofsLocal) { 2468 IS iP = NULL; 2469 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2470 2471 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2472 ierr = PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2473 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2474 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2475 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2476 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2477 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2478 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2479 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2480 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2481 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2482 if (iP) { 2483 IS newpressures; 2484 2485 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2486 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2487 pressures = newpressures; 2488 } 2489 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2490 if (!sorted) { 2491 ierr = ISSort(pressures);CHKERRQ(ierr); 2492 } 2493 } else { 2494 pressures = NULL; 2495 } 2496 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2497 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2498 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2499 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2500 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2501 if (!sorted) { 2502 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2503 } 2504 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2505 zerodiag_save = zerodiag; 2506 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2507 if (!nz) { 2508 if (n) have_null = PETSC_FALSE; 2509 has_null_pressures = PETSC_FALSE; 2510 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2511 } 2512 recompute_zerodiag = PETSC_FALSE; 2513 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2514 zerodiag_subs = NULL; 2515 pcbddc->benign_n = 0; 2516 n_interior_dofs = 0; 2517 interior_dofs = NULL; 2518 nneu = 0; 2519 if (pcbddc->NeumannBoundariesLocal) { 2520 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2521 } 2522 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2523 if (checkb) { /* need to compute interior nodes */ 2524 PetscInt n,i,j; 2525 PetscInt n_neigh,*neigh,*n_shared,**shared; 2526 PetscInt *iwork; 2527 2528 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2529 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2530 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2531 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2532 for (i=1;i<n_neigh;i++) 2533 for (j=0;j<n_shared[i];j++) 2534 iwork[shared[i][j]] += 1; 2535 for (i=0;i<n;i++) 2536 if (!iwork[i]) 2537 interior_dofs[n_interior_dofs++] = i; 2538 ierr = PetscFree(iwork);CHKERRQ(ierr); 2539 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2540 } 2541 if (has_null_pressures) { 2542 IS *subs; 2543 PetscInt nsubs,i,j,nl; 2544 const PetscInt *idxs; 2545 PetscScalar *array; 2546 Vec *work; 2547 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2548 2549 subs = pcbddc->local_subs; 2550 nsubs = pcbddc->n_local_subs; 2551 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2552 if (checkb) { 2553 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2554 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2555 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2556 /* work[0] = 1_p */ 2557 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2558 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2559 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2560 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2561 /* work[0] = 1_v */ 2562 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2563 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2564 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2565 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2566 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2567 } 2568 if (nsubs > 1) { 2569 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2570 for (i=0;i<nsubs;i++) { 2571 ISLocalToGlobalMapping l2g; 2572 IS t_zerodiag_subs; 2573 PetscInt nl; 2574 2575 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2576 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2577 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2578 if (nl) { 2579 PetscBool valid = PETSC_TRUE; 2580 2581 if (checkb) { 2582 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2583 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2584 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2585 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2586 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2587 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2588 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2589 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2590 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2591 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2592 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2593 for (j=0;j<n_interior_dofs;j++) { 2594 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2595 valid = PETSC_FALSE; 2596 break; 2597 } 2598 } 2599 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2600 } 2601 if (valid && nneu) { 2602 const PetscInt *idxs; 2603 PetscInt nzb; 2604 2605 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2606 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2607 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2608 if (nzb) valid = PETSC_FALSE; 2609 } 2610 if (valid && pressures) { 2611 IS t_pressure_subs; 2612 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2613 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2614 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2615 } 2616 if (valid) { 2617 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2618 pcbddc->benign_n++; 2619 } else { 2620 recompute_zerodiag = PETSC_TRUE; 2621 } 2622 } 2623 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2624 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2625 } 2626 } else { /* there's just one subdomain (or zero if they have not been detected */ 2627 PetscBool valid = PETSC_TRUE; 2628 2629 if (nneu) valid = PETSC_FALSE; 2630 if (valid && pressures) { 2631 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2632 } 2633 if (valid && checkb) { 2634 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2635 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2636 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2637 for (j=0;j<n_interior_dofs;j++) { 2638 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2639 valid = PETSC_FALSE; 2640 break; 2641 } 2642 } 2643 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2644 } 2645 if (valid) { 2646 pcbddc->benign_n = 1; 2647 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2648 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2649 zerodiag_subs[0] = zerodiag; 2650 } 2651 } 2652 if (checkb) { 2653 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2654 } 2655 } 2656 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2657 2658 if (!pcbddc->benign_n) { 2659 PetscInt n; 2660 2661 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2662 recompute_zerodiag = PETSC_FALSE; 2663 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2664 if (n) { 2665 has_null_pressures = PETSC_FALSE; 2666 have_null = PETSC_FALSE; 2667 } 2668 } 2669 2670 /* final check for null pressures */ 2671 if (zerodiag && pressures) { 2672 PetscInt nz,np; 2673 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2674 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2675 if (nz != np) have_null = PETSC_FALSE; 2676 } 2677 2678 if (recompute_zerodiag) { 2679 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2680 if (pcbddc->benign_n == 1) { 2681 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2682 zerodiag = zerodiag_subs[0]; 2683 } else { 2684 PetscInt i,nzn,*new_idxs; 2685 2686 nzn = 0; 2687 for (i=0;i<pcbddc->benign_n;i++) { 2688 PetscInt ns; 2689 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2690 nzn += ns; 2691 } 2692 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2693 nzn = 0; 2694 for (i=0;i<pcbddc->benign_n;i++) { 2695 PetscInt ns,*idxs; 2696 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2697 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2698 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2699 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2700 nzn += ns; 2701 } 2702 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2703 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2704 } 2705 have_null = PETSC_FALSE; 2706 } 2707 2708 /* Prepare matrix to compute no-net-flux */ 2709 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2710 Mat A,loc_divudotp; 2711 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2712 IS row,col,isused = NULL; 2713 PetscInt M,N,n,st,n_isused; 2714 2715 if (pressures) { 2716 isused = pressures; 2717 } else { 2718 isused = zerodiag_save; 2719 } 2720 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2721 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2722 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2723 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2724 n_isused = 0; 2725 if (isused) { 2726 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2727 } 2728 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2729 st = st-n_isused; 2730 if (n) { 2731 const PetscInt *gidxs; 2732 2733 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2734 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2735 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2736 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2737 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2738 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2739 } else { 2740 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2741 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2742 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2743 } 2744 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2745 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2746 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2747 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2748 ierr = ISDestroy(&row);CHKERRQ(ierr); 2749 ierr = ISDestroy(&col);CHKERRQ(ierr); 2750 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2751 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2752 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2753 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2754 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2755 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2756 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2757 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2758 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2759 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2760 } 2761 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2762 2763 /* change of basis and p0 dofs */ 2764 if (has_null_pressures) { 2765 IS zerodiagc; 2766 const PetscInt *idxs,*idxsc; 2767 PetscInt i,s,*nnz; 2768 2769 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2770 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2771 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2772 /* local change of basis for pressures */ 2773 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2774 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2775 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2776 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2777 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2778 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2779 for (i=0;i<pcbddc->benign_n;i++) { 2780 PetscInt nzs,j; 2781 2782 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2783 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2784 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2785 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2786 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2787 } 2788 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2789 ierr = PetscFree(nnz);CHKERRQ(ierr); 2790 /* set identity on velocities */ 2791 for (i=0;i<n-nz;i++) { 2792 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2793 } 2794 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2795 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2796 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2797 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2798 /* set change on pressures */ 2799 for (s=0;s<pcbddc->benign_n;s++) { 2800 PetscScalar *array; 2801 PetscInt nzs; 2802 2803 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2804 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2805 for (i=0;i<nzs-1;i++) { 2806 PetscScalar vals[2]; 2807 PetscInt cols[2]; 2808 2809 cols[0] = idxs[i]; 2810 cols[1] = idxs[nzs-1]; 2811 vals[0] = 1.; 2812 vals[1] = 1.; 2813 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2814 } 2815 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2816 for (i=0;i<nzs-1;i++) array[i] = -1.; 2817 array[nzs-1] = 1.; 2818 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2819 /* store local idxs for p0 */ 2820 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2821 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2822 ierr = PetscFree(array);CHKERRQ(ierr); 2823 } 2824 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2825 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2826 /* project if needed */ 2827 if (pcbddc->benign_change_explicit) { 2828 Mat M; 2829 2830 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2831 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2832 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2833 ierr = MatDestroy(&M);CHKERRQ(ierr); 2834 } 2835 /* store global idxs for p0 */ 2836 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2837 } 2838 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2839 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2840 2841 /* determines if the coarse solver will be singular or not */ 2842 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2843 /* determines if the problem has subdomains with 0 pressure block */ 2844 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2845 *zerodiaglocal = zerodiag; 2846 PetscFunctionReturn(0); 2847 } 2848 2849 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2850 { 2851 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2852 PetscScalar *array; 2853 PetscErrorCode ierr; 2854 2855 PetscFunctionBegin; 2856 if (!pcbddc->benign_sf) { 2857 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2858 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2859 } 2860 if (get) { 2861 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2862 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2863 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2864 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2865 } else { 2866 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2867 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2868 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2869 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2870 } 2871 PetscFunctionReturn(0); 2872 } 2873 2874 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2875 { 2876 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2877 PetscErrorCode ierr; 2878 2879 PetscFunctionBegin; 2880 /* TODO: add error checking 2881 - avoid nested pop (or push) calls. 2882 - cannot push before pop. 2883 - cannot call this if pcbddc->local_mat is NULL 2884 */ 2885 if (!pcbddc->benign_n) { 2886 PetscFunctionReturn(0); 2887 } 2888 if (pop) { 2889 if (pcbddc->benign_change_explicit) { 2890 IS is_p0; 2891 MatReuse reuse; 2892 2893 /* extract B_0 */ 2894 reuse = MAT_INITIAL_MATRIX; 2895 if (pcbddc->benign_B0) { 2896 reuse = MAT_REUSE_MATRIX; 2897 } 2898 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2899 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2900 /* remove rows and cols from local problem */ 2901 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2902 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2903 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2904 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2905 } else { 2906 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2907 PetscScalar *vals; 2908 PetscInt i,n,*idxs_ins; 2909 2910 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2911 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2912 if (!pcbddc->benign_B0) { 2913 PetscInt *nnz; 2914 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2915 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2916 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2917 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2918 for (i=0;i<pcbddc->benign_n;i++) { 2919 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2920 nnz[i] = n - nnz[i]; 2921 } 2922 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2923 ierr = PetscFree(nnz);CHKERRQ(ierr); 2924 } 2925 2926 for (i=0;i<pcbddc->benign_n;i++) { 2927 PetscScalar *array; 2928 PetscInt *idxs,j,nz,cum; 2929 2930 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2931 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2932 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2933 for (j=0;j<nz;j++) vals[j] = 1.; 2934 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2935 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2936 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2937 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2938 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2939 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2940 cum = 0; 2941 for (j=0;j<n;j++) { 2942 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2943 vals[cum] = array[j]; 2944 idxs_ins[cum] = j; 2945 cum++; 2946 } 2947 } 2948 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2949 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2950 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2951 } 2952 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2953 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2954 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2955 } 2956 } else { /* push */ 2957 if (pcbddc->benign_change_explicit) { 2958 PetscInt i; 2959 2960 for (i=0;i<pcbddc->benign_n;i++) { 2961 PetscScalar *B0_vals; 2962 PetscInt *B0_cols,B0_ncol; 2963 2964 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2965 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2966 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2967 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2968 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2969 } 2970 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2971 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2972 } else { 2973 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2974 } 2975 } 2976 PetscFunctionReturn(0); 2977 } 2978 2979 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2980 { 2981 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2982 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2983 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2984 PetscBLASInt *B_iwork,*B_ifail; 2985 PetscScalar *work,lwork; 2986 PetscScalar *St,*S,*eigv; 2987 PetscScalar *Sarray,*Starray; 2988 PetscReal *eigs,thresh,lthresh,uthresh; 2989 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2990 PetscBool allocated_S_St; 2991 #if defined(PETSC_USE_COMPLEX) 2992 PetscReal *rwork; 2993 #endif 2994 PetscErrorCode ierr; 2995 2996 PetscFunctionBegin; 2997 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2998 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2999 if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3000 3001 if (pcbddc->dbg_flag) { 3002 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3003 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3004 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3005 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3006 } 3007 3008 if (pcbddc->dbg_flag) { 3009 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 3010 } 3011 3012 /* max size of subsets */ 3013 mss = 0; 3014 for (i=0;i<sub_schurs->n_subs;i++) { 3015 PetscInt subset_size; 3016 3017 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3018 mss = PetscMax(mss,subset_size); 3019 } 3020 3021 /* min/max and threshold */ 3022 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3023 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3024 nmax = PetscMax(nmin,nmax); 3025 allocated_S_St = PETSC_FALSE; 3026 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3027 allocated_S_St = PETSC_TRUE; 3028 } 3029 3030 /* allocate lapack workspace */ 3031 cum = cum2 = 0; 3032 maxneigs = 0; 3033 for (i=0;i<sub_schurs->n_subs;i++) { 3034 PetscInt n,subset_size; 3035 3036 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3037 n = PetscMin(subset_size,nmax); 3038 cum += subset_size; 3039 cum2 += subset_size*n; 3040 maxneigs = PetscMax(maxneigs,n); 3041 } 3042 if (mss) { 3043 if (sub_schurs->is_symmetric) { 3044 PetscBLASInt B_itype = 1; 3045 PetscBLASInt B_N = mss; 3046 PetscReal zero = 0.0; 3047 PetscReal eps = 0.0; /* dlamch? */ 3048 3049 B_lwork = -1; 3050 S = NULL; 3051 St = NULL; 3052 eigs = NULL; 3053 eigv = NULL; 3054 B_iwork = NULL; 3055 B_ifail = NULL; 3056 #if defined(PETSC_USE_COMPLEX) 3057 rwork = NULL; 3058 #endif 3059 thresh = 1.0; 3060 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3061 #if defined(PETSC_USE_COMPLEX) 3062 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3063 #else 3064 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3065 #endif 3066 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3067 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3068 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3069 } else { 3070 lwork = 0; 3071 } 3072 3073 nv = 0; 3074 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3075 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3076 } 3077 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3078 if (allocated_S_St) { 3079 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3080 } 3081 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3082 #if defined(PETSC_USE_COMPLEX) 3083 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3084 #endif 3085 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3086 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3087 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3088 nv+cum,&pcbddc->adaptive_constraints_idxs, 3089 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3090 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 3091 3092 maxneigs = 0; 3093 cum = cumarray = 0; 3094 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3095 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3096 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3097 const PetscInt *idxs; 3098 3099 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3100 for (cum=0;cum<nv;cum++) { 3101 pcbddc->adaptive_constraints_n[cum] = 1; 3102 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3103 pcbddc->adaptive_constraints_data[cum] = 1.0; 3104 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3105 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3106 } 3107 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3108 } 3109 3110 if (mss) { /* multilevel */ 3111 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3112 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3113 } 3114 3115 lthresh = pcbddc->adaptive_threshold[0]; 3116 uthresh = pcbddc->adaptive_threshold[1]; 3117 for (i=0;i<sub_schurs->n_subs;i++) { 3118 const PetscInt *idxs; 3119 PetscReal upper,lower; 3120 PetscInt j,subset_size,eigs_start = 0; 3121 PetscBLASInt B_N; 3122 PetscBool same_data = PETSC_FALSE; 3123 PetscBool scal = PETSC_FALSE; 3124 3125 if (pcbddc->use_deluxe_scaling) { 3126 upper = PETSC_MAX_REAL; 3127 lower = uthresh; 3128 } else { 3129 if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3130 upper = 1./uthresh; 3131 lower = 0.; 3132 } 3133 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3134 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3135 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3136 /* this is experimental: we assume the dofs have been properly grouped to have 3137 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3138 if (!sub_schurs->is_posdef) { 3139 Mat T; 3140 3141 for (j=0;j<subset_size;j++) { 3142 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3143 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);CHKERRQ(ierr); 3144 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3145 ierr = MatDestroy(&T);CHKERRQ(ierr); 3146 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);CHKERRQ(ierr); 3147 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3148 ierr = MatDestroy(&T);CHKERRQ(ierr); 3149 if (sub_schurs->change_primal_sub) { 3150 PetscInt nz,k; 3151 const PetscInt *idxs; 3152 3153 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);CHKERRQ(ierr); 3154 ierr = ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3155 for (k=0;k<nz;k++) { 3156 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3157 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3158 } 3159 ierr = ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3160 } 3161 scal = PETSC_TRUE; 3162 break; 3163 } 3164 } 3165 } 3166 3167 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3168 if (sub_schurs->is_symmetric) { 3169 PetscInt j,k; 3170 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 3171 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3172 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3173 } 3174 for (j=0;j<subset_size;j++) { 3175 for (k=j;k<subset_size;k++) { 3176 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3177 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3178 } 3179 } 3180 } else { 3181 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3182 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3183 } 3184 } else { 3185 S = Sarray + cumarray; 3186 St = Starray + cumarray; 3187 } 3188 /* see if we can save some work */ 3189 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3190 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 3191 } 3192 3193 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3194 B_neigs = 0; 3195 } else { 3196 if (sub_schurs->is_symmetric) { 3197 PetscBLASInt B_itype = 1; 3198 PetscBLASInt B_IL, B_IU; 3199 PetscReal eps = -1.0; /* dlamch? */ 3200 PetscInt nmin_s; 3201 PetscBool compute_range; 3202 3203 compute_range = (PetscBool)!same_data; 3204 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3205 3206 if (pcbddc->dbg_flag) { 3207 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d (range %d).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range);CHKERRQ(ierr); 3208 } 3209 3210 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3211 if (compute_range) { 3212 3213 /* ask for eigenvalues larger than thresh */ 3214 if (sub_schurs->is_posdef) { 3215 #if defined(PETSC_USE_COMPLEX) 3216 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3217 #else 3218 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3219 #endif 3220 } else { /* no theory so far, but it works nicely */ 3221 PetscInt recipe = 0; 3222 PetscReal bb[2]; 3223 3224 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);CHKERRQ(ierr); 3225 switch (recipe) { 3226 case 0: 3227 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3228 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3229 #if defined(PETSC_USE_COMPLEX) 3230 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3231 #else 3232 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3233 #endif 3234 break; 3235 case 1: 3236 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3237 #if defined(PETSC_USE_COMPLEX) 3238 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3239 #else 3240 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3241 #endif 3242 if (!scal) { 3243 PetscBLASInt B_neigs2; 3244 3245 bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; 3246 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3247 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3248 #if defined(PETSC_USE_COMPLEX) 3249 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3250 #else 3251 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3252 #endif 3253 B_neigs += B_neigs2; 3254 } 3255 break; 3256 default: 3257 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe); 3258 break; 3259 } 3260 } 3261 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3262 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3263 B_IL = 1; 3264 #if defined(PETSC_USE_COMPLEX) 3265 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3266 #else 3267 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3268 #endif 3269 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3270 PetscInt k; 3271 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3272 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3273 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3274 nmin = nmax; 3275 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 3276 for (k=0;k<nmax;k++) { 3277 eigs[k] = 1./PETSC_SMALL; 3278 eigv[k*(subset_size+1)] = 1.0; 3279 } 3280 } 3281 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3282 if (B_ierr) { 3283 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3284 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3285 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3286 } 3287 3288 if (B_neigs > nmax) { 3289 if (pcbddc->dbg_flag) { 3290 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax);CHKERRQ(ierr); 3291 } 3292 if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax; 3293 B_neigs = nmax; 3294 } 3295 3296 nmin_s = PetscMin(nmin,B_N); 3297 if (B_neigs < nmin_s) { 3298 PetscBLASInt B_neigs2; 3299 3300 if (pcbddc->use_deluxe_scaling) { 3301 if (scal) { 3302 B_IU = nmin_s; 3303 B_IL = B_neigs + 1; 3304 } else { 3305 B_IL = B_N - nmin_s + 1; 3306 B_IU = B_N - B_neigs; 3307 } 3308 } else { 3309 B_IL = B_neigs + 1; 3310 B_IU = nmin_s; 3311 } 3312 if (pcbddc->dbg_flag) { 3313 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 3314 } 3315 if (sub_schurs->is_symmetric) { 3316 PetscInt j,k; 3317 for (j=0;j<subset_size;j++) { 3318 for (k=j;k<subset_size;k++) { 3319 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3320 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3321 } 3322 } 3323 } else { 3324 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3325 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3326 } 3327 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3328 #if defined(PETSC_USE_COMPLEX) 3329 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3330 #else 3331 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3332 #endif 3333 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3334 B_neigs += B_neigs2; 3335 } 3336 if (B_ierr) { 3337 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3338 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3339 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3340 } 3341 if (pcbddc->dbg_flag) { 3342 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3343 for (j=0;j<B_neigs;j++) { 3344 if (eigs[j] == 0.0) { 3345 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3346 } else { 3347 if (pcbddc->use_deluxe_scaling) { 3348 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3349 } else { 3350 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3351 } 3352 } 3353 } 3354 } 3355 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3356 } 3357 /* change the basis back to the original one */ 3358 if (sub_schurs->change) { 3359 Mat change,phi,phit; 3360 3361 if (pcbddc->dbg_flag > 2) { 3362 PetscInt ii; 3363 for (ii=0;ii<B_neigs;ii++) { 3364 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3365 for (j=0;j<B_N;j++) { 3366 #if defined(PETSC_USE_COMPLEX) 3367 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3368 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3369 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3370 #else 3371 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3372 #endif 3373 } 3374 } 3375 } 3376 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3377 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3378 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3379 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3380 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3381 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3382 } 3383 maxneigs = PetscMax(B_neigs,maxneigs); 3384 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3385 if (B_neigs) { 3386 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3387 3388 if (pcbddc->dbg_flag > 1) { 3389 PetscInt ii; 3390 for (ii=0;ii<B_neigs;ii++) { 3391 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3392 for (j=0;j<B_N;j++) { 3393 #if defined(PETSC_USE_COMPLEX) 3394 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3395 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3396 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3397 #else 3398 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3399 #endif 3400 } 3401 } 3402 } 3403 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3404 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3405 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3406 cum++; 3407 } 3408 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3409 /* shift for next computation */ 3410 cumarray += subset_size*subset_size; 3411 } 3412 if (pcbddc->dbg_flag) { 3413 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3414 } 3415 3416 if (mss) { 3417 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3418 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3419 /* destroy matrices (junk) */ 3420 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3421 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3422 } 3423 if (allocated_S_St) { 3424 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3425 } 3426 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3427 #if defined(PETSC_USE_COMPLEX) 3428 ierr = PetscFree(rwork);CHKERRQ(ierr); 3429 #endif 3430 if (pcbddc->dbg_flag) { 3431 PetscInt maxneigs_r; 3432 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3433 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3434 } 3435 PetscFunctionReturn(0); 3436 } 3437 3438 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3439 { 3440 PetscScalar *coarse_submat_vals; 3441 PetscErrorCode ierr; 3442 3443 PetscFunctionBegin; 3444 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3445 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3446 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3447 3448 /* Setup local neumann solver ksp_R */ 3449 /* PCBDDCSetUpLocalScatters should be called first! */ 3450 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3451 3452 /* 3453 Setup local correction and local part of coarse basis. 3454 Gives back the dense local part of the coarse matrix in column major ordering 3455 */ 3456 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3457 3458 /* Compute total number of coarse nodes and setup coarse solver */ 3459 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3460 3461 /* free */ 3462 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3463 PetscFunctionReturn(0); 3464 } 3465 3466 PetscErrorCode PCBDDCResetCustomization(PC pc) 3467 { 3468 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3469 PetscErrorCode ierr; 3470 3471 PetscFunctionBegin; 3472 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3473 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3474 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3475 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3476 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3477 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3478 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3479 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3480 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3481 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3482 PetscFunctionReturn(0); 3483 } 3484 3485 PetscErrorCode PCBDDCResetTopography(PC pc) 3486 { 3487 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3488 PetscInt i; 3489 PetscErrorCode ierr; 3490 3491 PetscFunctionBegin; 3492 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3493 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3494 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3495 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3496 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3497 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3498 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3499 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3500 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3501 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3502 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3503 for (i=0;i<pcbddc->n_local_subs;i++) { 3504 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3505 } 3506 pcbddc->n_local_subs = 0; 3507 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3508 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3509 pcbddc->graphanalyzed = PETSC_FALSE; 3510 pcbddc->recompute_topography = PETSC_TRUE; 3511 PetscFunctionReturn(0); 3512 } 3513 3514 PetscErrorCode PCBDDCResetSolvers(PC pc) 3515 { 3516 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3517 PetscErrorCode ierr; 3518 3519 PetscFunctionBegin; 3520 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3521 if (pcbddc->coarse_phi_B) { 3522 PetscScalar *array; 3523 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3524 ierr = PetscFree(array);CHKERRQ(ierr); 3525 } 3526 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3527 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3528 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3529 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3530 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3531 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3532 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3533 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3534 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3535 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3536 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3537 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3538 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3539 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3540 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3541 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3542 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3543 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3544 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3545 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3546 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3547 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3548 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3549 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3550 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3551 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3552 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3553 if (pcbddc->benign_zerodiag_subs) { 3554 PetscInt i; 3555 for (i=0;i<pcbddc->benign_n;i++) { 3556 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3557 } 3558 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3559 } 3560 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3561 PetscFunctionReturn(0); 3562 } 3563 3564 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3565 { 3566 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3567 PC_IS *pcis = (PC_IS*)pc->data; 3568 VecType impVecType; 3569 PetscInt n_constraints,n_R,old_size; 3570 PetscErrorCode ierr; 3571 3572 PetscFunctionBegin; 3573 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3574 n_R = pcis->n - pcbddc->n_vertices; 3575 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3576 /* local work vectors (try to avoid unneeded work)*/ 3577 /* R nodes */ 3578 old_size = -1; 3579 if (pcbddc->vec1_R) { 3580 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3581 } 3582 if (n_R != old_size) { 3583 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3584 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3585 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3586 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3587 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3588 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3589 } 3590 /* local primal dofs */ 3591 old_size = -1; 3592 if (pcbddc->vec1_P) { 3593 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3594 } 3595 if (pcbddc->local_primal_size != old_size) { 3596 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3597 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3598 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3599 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3600 } 3601 /* local explicit constraints */ 3602 old_size = -1; 3603 if (pcbddc->vec1_C) { 3604 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3605 } 3606 if (n_constraints && n_constraints != old_size) { 3607 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3608 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3609 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3610 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3611 } 3612 PetscFunctionReturn(0); 3613 } 3614 3615 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3616 { 3617 PetscErrorCode ierr; 3618 /* pointers to pcis and pcbddc */ 3619 PC_IS* pcis = (PC_IS*)pc->data; 3620 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3621 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3622 /* submatrices of local problem */ 3623 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3624 /* submatrices of local coarse problem */ 3625 Mat S_VV,S_CV,S_VC,S_CC; 3626 /* working matrices */ 3627 Mat C_CR; 3628 /* additional working stuff */ 3629 PC pc_R; 3630 Mat F,Brhs = NULL; 3631 Vec dummy_vec; 3632 PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs; 3633 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3634 PetscScalar *work; 3635 PetscInt *idx_V_B; 3636 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3637 PetscInt i,n_R,n_D,n_B; 3638 3639 /* some shortcuts to scalars */ 3640 PetscScalar one=1.0,m_one=-1.0; 3641 3642 PetscFunctionBegin; 3643 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3644 3645 /* Set Non-overlapping dimensions */ 3646 n_vertices = pcbddc->n_vertices; 3647 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3648 n_B = pcis->n_B; 3649 n_D = pcis->n - n_B; 3650 n_R = pcis->n - n_vertices; 3651 3652 /* vertices in boundary numbering */ 3653 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3654 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3655 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3656 3657 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3658 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3659 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3660 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3661 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3662 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3663 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3664 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3665 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3666 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3667 3668 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3669 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3670 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3671 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3672 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3673 lda_rhs = n_R; 3674 need_benign_correction = PETSC_FALSE; 3675 if (isLU || isILU || isCHOL) { 3676 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3677 } else if (sub_schurs && sub_schurs->reuse_solver) { 3678 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3679 MatFactorType type; 3680 3681 F = reuse_solver->F; 3682 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3683 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3684 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3685 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3686 } else { 3687 F = NULL; 3688 } 3689 3690 /* determine if we can use a sparse right-hand side */ 3691 sparserhs = PETSC_FALSE; 3692 if (F) { 3693 MatSolverType solver; 3694 3695 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3696 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3697 } 3698 3699 /* allocate workspace */ 3700 n = 0; 3701 if (n_constraints) { 3702 n += lda_rhs*n_constraints; 3703 } 3704 if (n_vertices) { 3705 n = PetscMax(2*lda_rhs*n_vertices,n); 3706 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3707 } 3708 if (!pcbddc->symmetric_primal) { 3709 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3710 } 3711 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3712 3713 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3714 dummy_vec = NULL; 3715 if (need_benign_correction && lda_rhs != n_R && F) { 3716 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3717 } 3718 3719 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3720 if (n_constraints) { 3721 Mat M3,C_B; 3722 IS is_aux; 3723 PetscScalar *array,*array2; 3724 3725 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3726 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3727 3728 /* Extract constraints on R nodes: C_{CR} */ 3729 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3730 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3731 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3732 3733 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3734 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3735 if (!sparserhs) { 3736 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3737 for (i=0;i<n_constraints;i++) { 3738 const PetscScalar *row_cmat_values; 3739 const PetscInt *row_cmat_indices; 3740 PetscInt size_of_constraint,j; 3741 3742 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3743 for (j=0;j<size_of_constraint;j++) { 3744 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3745 } 3746 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3747 } 3748 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 3749 } else { 3750 Mat tC_CR; 3751 3752 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3753 if (lda_rhs != n_R) { 3754 PetscScalar *aa; 3755 PetscInt r,*ii,*jj; 3756 PetscBool done; 3757 3758 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3759 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 3760 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 3761 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 3762 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3763 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 3764 } else { 3765 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 3766 tC_CR = C_CR; 3767 } 3768 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 3769 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 3770 } 3771 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3772 if (F) { 3773 if (need_benign_correction) { 3774 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3775 3776 /* rhs is already zero on interior dofs, no need to change the rhs */ 3777 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3778 } 3779 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 3780 if (need_benign_correction) { 3781 PetscScalar *marr; 3782 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3783 3784 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3785 if (lda_rhs != n_R) { 3786 for (i=0;i<n_constraints;i++) { 3787 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3788 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3789 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3790 } 3791 } else { 3792 for (i=0;i<n_constraints;i++) { 3793 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3794 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3795 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3796 } 3797 } 3798 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3799 } 3800 } else { 3801 PetscScalar *marr; 3802 3803 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3804 for (i=0;i<n_constraints;i++) { 3805 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3806 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3807 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3808 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3809 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3810 } 3811 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3812 } 3813 if (sparserhs) { 3814 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3815 } 3816 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3817 if (!pcbddc->switch_static) { 3818 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3819 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3820 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3821 for (i=0;i<n_constraints;i++) { 3822 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3823 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3824 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3825 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3826 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3827 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3828 } 3829 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3830 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3831 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3832 } else { 3833 if (lda_rhs != n_R) { 3834 IS dummy; 3835 3836 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3837 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3838 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3839 } else { 3840 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3841 pcbddc->local_auxmat2 = local_auxmat2_R; 3842 } 3843 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3844 } 3845 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3846 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3847 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3848 if (isCHOL) { 3849 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3850 } else { 3851 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3852 } 3853 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 3854 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3855 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3856 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3857 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3858 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3859 } 3860 3861 /* Get submatrices from subdomain matrix */ 3862 if (n_vertices) { 3863 IS is_aux; 3864 PetscBool isseqaij; 3865 3866 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3867 IS tis; 3868 3869 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3870 ierr = ISSort(tis);CHKERRQ(ierr); 3871 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3872 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3873 } else { 3874 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3875 } 3876 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3877 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3878 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3879 if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */ 3880 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3881 } 3882 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3883 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3884 } 3885 3886 /* Matrix of coarse basis functions (local) */ 3887 if (pcbddc->coarse_phi_B) { 3888 PetscInt on_B,on_primal,on_D=n_D; 3889 if (pcbddc->coarse_phi_D) { 3890 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3891 } 3892 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3893 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3894 PetscScalar *marray; 3895 3896 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3897 ierr = PetscFree(marray);CHKERRQ(ierr); 3898 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3899 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3900 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3901 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3902 } 3903 } 3904 3905 if (!pcbddc->coarse_phi_B) { 3906 PetscScalar *marr; 3907 3908 /* memory size */ 3909 n = n_B*pcbddc->local_primal_size; 3910 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 3911 if (!pcbddc->symmetric_primal) n *= 2; 3912 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 3913 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3914 marr += n_B*pcbddc->local_primal_size; 3915 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3916 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3917 marr += n_D*pcbddc->local_primal_size; 3918 } 3919 if (!pcbddc->symmetric_primal) { 3920 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3921 marr += n_B*pcbddc->local_primal_size; 3922 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3923 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3924 } 3925 } else { 3926 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3927 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3928 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3929 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3930 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3931 } 3932 } 3933 } 3934 3935 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3936 p0_lidx_I = NULL; 3937 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3938 const PetscInt *idxs; 3939 3940 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3941 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3942 for (i=0;i<pcbddc->benign_n;i++) { 3943 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3944 } 3945 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3946 } 3947 3948 /* vertices */ 3949 if (n_vertices) { 3950 PetscBool restoreavr = PETSC_FALSE; 3951 3952 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3953 3954 if (n_R) { 3955 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3956 PetscBLASInt B_N,B_one = 1; 3957 PetscScalar *x,*y; 3958 3959 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3960 if (need_benign_correction) { 3961 ISLocalToGlobalMapping RtoN; 3962 IS is_p0; 3963 PetscInt *idxs_p0,n; 3964 3965 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3966 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3967 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3968 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3969 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3970 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3971 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3972 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3973 } 3974 3975 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3976 if (!sparserhs || need_benign_correction) { 3977 if (lda_rhs == n_R) { 3978 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3979 } else { 3980 PetscScalar *av,*array; 3981 const PetscInt *xadj,*adjncy; 3982 PetscInt n; 3983 PetscBool flg_row; 3984 3985 array = work+lda_rhs*n_vertices; 3986 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3987 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3988 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3989 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3990 for (i=0;i<n;i++) { 3991 PetscInt j; 3992 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3993 } 3994 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3995 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3996 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3997 } 3998 if (need_benign_correction) { 3999 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4000 PetscScalar *marr; 4001 4002 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 4003 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4004 4005 | 0 0 0 | (V) 4006 L = | 0 0 -1 | (P-p0) 4007 | 0 0 -1 | (p0) 4008 4009 */ 4010 for (i=0;i<reuse_solver->benign_n;i++) { 4011 const PetscScalar *vals; 4012 const PetscInt *idxs,*idxs_zero; 4013 PetscInt n,j,nz; 4014 4015 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4016 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4017 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4018 for (j=0;j<n;j++) { 4019 PetscScalar val = vals[j]; 4020 PetscInt k,col = idxs[j]; 4021 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4022 } 4023 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4024 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4025 } 4026 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 4027 } 4028 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 4029 Brhs = A_RV; 4030 } else { 4031 Mat tA_RVT,A_RVT; 4032 4033 if (!pcbddc->symmetric_primal) { 4034 /* A_RV already scaled by -1 */ 4035 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 4036 } else { 4037 restoreavr = PETSC_TRUE; 4038 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4039 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 4040 A_RVT = A_VR; 4041 } 4042 if (lda_rhs != n_R) { 4043 PetscScalar *aa; 4044 PetscInt r,*ii,*jj; 4045 PetscBool done; 4046 4047 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4048 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4049 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4050 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4051 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4052 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4053 } else { 4054 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4055 tA_RVT = A_RVT; 4056 } 4057 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4058 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4059 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4060 } 4061 if (F) { 4062 /* need to correct the rhs */ 4063 if (need_benign_correction) { 4064 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4065 PetscScalar *marr; 4066 4067 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4068 if (lda_rhs != n_R) { 4069 for (i=0;i<n_vertices;i++) { 4070 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4071 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4072 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4073 } 4074 } else { 4075 for (i=0;i<n_vertices;i++) { 4076 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4077 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4078 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4079 } 4080 } 4081 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4082 } 4083 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4084 if (restoreavr) { 4085 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4086 } 4087 /* need to correct the solution */ 4088 if (need_benign_correction) { 4089 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4090 PetscScalar *marr; 4091 4092 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4093 if (lda_rhs != n_R) { 4094 for (i=0;i<n_vertices;i++) { 4095 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4096 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4097 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4098 } 4099 } else { 4100 for (i=0;i<n_vertices;i++) { 4101 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4102 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4103 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4104 } 4105 } 4106 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4107 } 4108 } else { 4109 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4110 for (i=0;i<n_vertices;i++) { 4111 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4112 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4113 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4114 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4115 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4116 } 4117 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4118 } 4119 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4120 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4121 /* S_VV and S_CV */ 4122 if (n_constraints) { 4123 Mat B; 4124 4125 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4126 for (i=0;i<n_vertices;i++) { 4127 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4128 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4129 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4130 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4131 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4132 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4133 } 4134 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4135 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 4136 ierr = MatDestroy(&B);CHKERRQ(ierr); 4137 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4138 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4139 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4140 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4141 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4142 ierr = MatDestroy(&B);CHKERRQ(ierr); 4143 } 4144 if (lda_rhs != n_R) { 4145 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4146 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4147 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4148 } 4149 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4150 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4151 if (need_benign_correction) { 4152 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4153 PetscScalar *marr,*sums; 4154 4155 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4156 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4157 for (i=0;i<reuse_solver->benign_n;i++) { 4158 const PetscScalar *vals; 4159 const PetscInt *idxs,*idxs_zero; 4160 PetscInt n,j,nz; 4161 4162 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4163 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4164 for (j=0;j<n_vertices;j++) { 4165 PetscInt k; 4166 sums[j] = 0.; 4167 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4168 } 4169 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4170 for (j=0;j<n;j++) { 4171 PetscScalar val = vals[j]; 4172 PetscInt k; 4173 for (k=0;k<n_vertices;k++) { 4174 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4175 } 4176 } 4177 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4178 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4179 } 4180 ierr = PetscFree(sums);CHKERRQ(ierr); 4181 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4182 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4183 } 4184 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4185 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4186 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 4187 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4188 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4189 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 4190 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4191 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4192 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4193 } else { 4194 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4195 } 4196 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4197 4198 /* coarse basis functions */ 4199 for (i=0;i<n_vertices;i++) { 4200 PetscScalar *y; 4201 4202 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4203 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4204 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4205 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4206 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4207 y[n_B*i+idx_V_B[i]] = 1.0; 4208 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4209 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4210 4211 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4212 PetscInt j; 4213 4214 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4215 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4216 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4217 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4218 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4219 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4220 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4221 } 4222 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4223 } 4224 /* if n_R == 0 the object is not destroyed */ 4225 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4226 } 4227 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4228 4229 if (n_constraints) { 4230 Mat B; 4231 4232 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4233 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4234 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4235 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4236 if (n_vertices) { 4237 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4238 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4239 } else { 4240 Mat S_VCt; 4241 4242 if (lda_rhs != n_R) { 4243 ierr = MatDestroy(&B);CHKERRQ(ierr); 4244 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4245 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4246 } 4247 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4248 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4249 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4250 } 4251 } 4252 ierr = MatDestroy(&B);CHKERRQ(ierr); 4253 /* coarse basis functions */ 4254 for (i=0;i<n_constraints;i++) { 4255 PetscScalar *y; 4256 4257 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4258 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4259 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4260 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4261 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4262 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4263 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4264 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4265 PetscInt j; 4266 4267 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4268 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4269 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4270 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4271 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4272 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4273 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4274 } 4275 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4276 } 4277 } 4278 if (n_constraints) { 4279 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4280 } 4281 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4282 4283 /* coarse matrix entries relative to B_0 */ 4284 if (pcbddc->benign_n) { 4285 Mat B0_B,B0_BPHI; 4286 IS is_dummy; 4287 PetscScalar *data; 4288 PetscInt j; 4289 4290 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4291 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4292 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4293 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4294 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4295 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 4296 for (j=0;j<pcbddc->benign_n;j++) { 4297 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4298 for (i=0;i<pcbddc->local_primal_size;i++) { 4299 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4300 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4301 } 4302 } 4303 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 4304 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4305 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4306 } 4307 4308 /* compute other basis functions for non-symmetric problems */ 4309 if (!pcbddc->symmetric_primal) { 4310 Mat B_V=NULL,B_C=NULL; 4311 PetscScalar *marray; 4312 4313 if (n_constraints) { 4314 Mat S_CCT,C_CRT; 4315 4316 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4317 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4318 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4319 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4320 if (n_vertices) { 4321 Mat S_VCT; 4322 4323 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4324 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4325 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4326 } 4327 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4328 } else { 4329 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4330 } 4331 if (n_vertices && n_R) { 4332 PetscScalar *av,*marray; 4333 const PetscInt *xadj,*adjncy; 4334 PetscInt n; 4335 PetscBool flg_row; 4336 4337 /* B_V = B_V - A_VR^T */ 4338 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4339 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4340 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4341 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4342 for (i=0;i<n;i++) { 4343 PetscInt j; 4344 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4345 } 4346 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4347 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4348 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4349 } 4350 4351 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4352 if (n_vertices) { 4353 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4354 for (i=0;i<n_vertices;i++) { 4355 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4356 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4357 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4358 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4359 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4360 } 4361 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4362 } 4363 if (B_C) { 4364 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4365 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4366 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4367 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4368 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4369 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4370 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4371 } 4372 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4373 } 4374 /* coarse basis functions */ 4375 for (i=0;i<pcbddc->local_primal_size;i++) { 4376 PetscScalar *y; 4377 4378 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4379 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4380 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4381 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4382 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4383 if (i<n_vertices) { 4384 y[n_B*i+idx_V_B[i]] = 1.0; 4385 } 4386 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4387 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4388 4389 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4390 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4391 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4392 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4393 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4394 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4395 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4396 } 4397 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4398 } 4399 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4400 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4401 } 4402 4403 /* free memory */ 4404 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4405 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4406 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4407 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4408 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4409 ierr = PetscFree(work);CHKERRQ(ierr); 4410 if (n_vertices) { 4411 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4412 } 4413 if (n_constraints) { 4414 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4415 } 4416 /* Checking coarse_sub_mat and coarse basis functios */ 4417 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4418 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4419 if (pcbddc->dbg_flag) { 4420 Mat coarse_sub_mat; 4421 Mat AUXMAT,TM1,TM2,TM3,TM4; 4422 Mat coarse_phi_D,coarse_phi_B; 4423 Mat coarse_psi_D,coarse_psi_B; 4424 Mat A_II,A_BB,A_IB,A_BI; 4425 Mat C_B,CPHI; 4426 IS is_dummy; 4427 Vec mones; 4428 MatType checkmattype=MATSEQAIJ; 4429 PetscReal real_value; 4430 4431 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4432 Mat A; 4433 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4434 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4435 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4436 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4437 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4438 ierr = MatDestroy(&A);CHKERRQ(ierr); 4439 } else { 4440 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4441 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4442 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4443 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4444 } 4445 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4446 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4447 if (!pcbddc->symmetric_primal) { 4448 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4449 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4450 } 4451 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4452 4453 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4454 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4455 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4456 if (!pcbddc->symmetric_primal) { 4457 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4458 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4459 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4460 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4461 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4462 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4463 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4464 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4465 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4466 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4467 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4468 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4469 } else { 4470 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4471 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4472 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4473 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4474 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4475 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4476 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4477 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4478 } 4479 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4480 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4481 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4482 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4483 if (pcbddc->benign_n) { 4484 Mat B0_B,B0_BPHI; 4485 PetscScalar *data,*data2; 4486 PetscInt j; 4487 4488 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4489 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4490 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4491 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4492 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4493 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4494 for (j=0;j<pcbddc->benign_n;j++) { 4495 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4496 for (i=0;i<pcbddc->local_primal_size;i++) { 4497 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4498 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4499 } 4500 } 4501 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4502 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4503 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4504 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4505 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4506 } 4507 #if 0 4508 { 4509 PetscViewer viewer; 4510 char filename[256]; 4511 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4512 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4513 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4514 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4515 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4516 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4517 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4518 if (pcbddc->coarse_phi_B) { 4519 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4520 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4521 } 4522 if (pcbddc->coarse_phi_D) { 4523 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4524 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4525 } 4526 if (pcbddc->coarse_psi_B) { 4527 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4528 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4529 } 4530 if (pcbddc->coarse_psi_D) { 4531 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4532 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4533 } 4534 ierr = PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");CHKERRQ(ierr); 4535 ierr = MatView(pcbddc->local_mat,viewer);CHKERRQ(ierr); 4536 ierr = PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");CHKERRQ(ierr); 4537 ierr = MatView(pcbddc->ConstraintMatrix,viewer);CHKERRQ(ierr); 4538 ierr = PetscObjectSetName((PetscObject)pcis->is_I_local,"I");CHKERRQ(ierr); 4539 ierr = ISView(pcis->is_I_local,viewer);CHKERRQ(ierr); 4540 ierr = PetscObjectSetName((PetscObject)pcis->is_B_local,"B");CHKERRQ(ierr); 4541 ierr = ISView(pcis->is_B_local,viewer);CHKERRQ(ierr); 4542 ierr = PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");CHKERRQ(ierr); 4543 ierr = ISView(pcbddc->is_R_local,viewer);CHKERRQ(ierr); 4544 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4545 } 4546 #endif 4547 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4548 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4549 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4550 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4551 4552 /* check constraints */ 4553 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4554 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4555 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4556 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4557 } else { 4558 PetscScalar *data; 4559 Mat tmat; 4560 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4561 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4562 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4563 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4564 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4565 } 4566 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4567 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4568 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4569 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4570 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4571 if (!pcbddc->symmetric_primal) { 4572 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4573 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4574 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4575 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4576 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4577 } 4578 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4579 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4580 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4581 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4582 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4583 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4584 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4585 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4586 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4587 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4588 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4589 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4590 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4591 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4592 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4593 if (!pcbddc->symmetric_primal) { 4594 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4595 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4596 } 4597 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4598 } 4599 /* get back data */ 4600 *coarse_submat_vals_n = coarse_submat_vals; 4601 PetscFunctionReturn(0); 4602 } 4603 4604 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4605 { 4606 Mat *work_mat; 4607 IS isrow_s,iscol_s; 4608 PetscBool rsorted,csorted; 4609 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4610 PetscErrorCode ierr; 4611 4612 PetscFunctionBegin; 4613 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4614 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4615 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4616 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4617 4618 if (!rsorted) { 4619 const PetscInt *idxs; 4620 PetscInt *idxs_sorted,i; 4621 4622 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4623 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4624 for (i=0;i<rsize;i++) { 4625 idxs_perm_r[i] = i; 4626 } 4627 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4628 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4629 for (i=0;i<rsize;i++) { 4630 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4631 } 4632 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4633 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4634 } else { 4635 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4636 isrow_s = isrow; 4637 } 4638 4639 if (!csorted) { 4640 if (isrow == iscol) { 4641 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4642 iscol_s = isrow_s; 4643 } else { 4644 const PetscInt *idxs; 4645 PetscInt *idxs_sorted,i; 4646 4647 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4648 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4649 for (i=0;i<csize;i++) { 4650 idxs_perm_c[i] = i; 4651 } 4652 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4653 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4654 for (i=0;i<csize;i++) { 4655 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4656 } 4657 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4658 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4659 } 4660 } else { 4661 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4662 iscol_s = iscol; 4663 } 4664 4665 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4666 4667 if (!rsorted || !csorted) { 4668 Mat new_mat; 4669 IS is_perm_r,is_perm_c; 4670 4671 if (!rsorted) { 4672 PetscInt *idxs_r,i; 4673 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4674 for (i=0;i<rsize;i++) { 4675 idxs_r[idxs_perm_r[i]] = i; 4676 } 4677 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4678 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4679 } else { 4680 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4681 } 4682 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4683 4684 if (!csorted) { 4685 if (isrow_s == iscol_s) { 4686 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4687 is_perm_c = is_perm_r; 4688 } else { 4689 PetscInt *idxs_c,i; 4690 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4691 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4692 for (i=0;i<csize;i++) { 4693 idxs_c[idxs_perm_c[i]] = i; 4694 } 4695 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4696 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4697 } 4698 } else { 4699 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4700 } 4701 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4702 4703 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4704 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4705 work_mat[0] = new_mat; 4706 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4707 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4708 } 4709 4710 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4711 *B = work_mat[0]; 4712 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4713 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4714 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4715 PetscFunctionReturn(0); 4716 } 4717 4718 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4719 { 4720 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4721 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4722 Mat new_mat,lA; 4723 IS is_local,is_global; 4724 PetscInt local_size; 4725 PetscBool isseqaij; 4726 PetscErrorCode ierr; 4727 4728 PetscFunctionBegin; 4729 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4730 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4731 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4732 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4733 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4734 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4735 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4736 4737 /* check */ 4738 if (pcbddc->dbg_flag) { 4739 Vec x,x_change; 4740 PetscReal error; 4741 4742 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4743 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4744 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4745 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4746 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4747 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4748 if (!pcbddc->change_interior) { 4749 const PetscScalar *x,*y,*v; 4750 PetscReal lerror = 0.; 4751 PetscInt i; 4752 4753 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4754 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4755 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4756 for (i=0;i<local_size;i++) 4757 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4758 lerror = PetscAbsScalar(x[i]-y[i]); 4759 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4760 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4761 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4762 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4763 if (error > PETSC_SMALL) { 4764 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4765 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4766 } else { 4767 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4768 } 4769 } 4770 } 4771 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4772 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4773 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4774 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4775 if (error > PETSC_SMALL) { 4776 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4777 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4778 } else { 4779 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4780 } 4781 } 4782 ierr = VecDestroy(&x);CHKERRQ(ierr); 4783 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4784 } 4785 4786 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4787 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4788 4789 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4790 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4791 if (isseqaij) { 4792 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4793 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4794 if (lA) { 4795 Mat work; 4796 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4797 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4798 ierr = MatDestroy(&work);CHKERRQ(ierr); 4799 } 4800 } else { 4801 Mat work_mat; 4802 4803 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4804 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4805 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4806 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4807 if (lA) { 4808 Mat work; 4809 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4810 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4811 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4812 ierr = MatDestroy(&work);CHKERRQ(ierr); 4813 } 4814 } 4815 if (matis->A->symmetric_set) { 4816 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4817 #if !defined(PETSC_USE_COMPLEX) 4818 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4819 #endif 4820 } 4821 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4822 PetscFunctionReturn(0); 4823 } 4824 4825 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4826 { 4827 PC_IS* pcis = (PC_IS*)(pc->data); 4828 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4829 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4830 PetscInt *idx_R_local=NULL; 4831 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4832 PetscInt vbs,bs; 4833 PetscBT bitmask=NULL; 4834 PetscErrorCode ierr; 4835 4836 PetscFunctionBegin; 4837 /* 4838 No need to setup local scatters if 4839 - primal space is unchanged 4840 AND 4841 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4842 AND 4843 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4844 */ 4845 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4846 PetscFunctionReturn(0); 4847 } 4848 /* destroy old objects */ 4849 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4850 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4851 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4852 /* Set Non-overlapping dimensions */ 4853 n_B = pcis->n_B; 4854 n_D = pcis->n - n_B; 4855 n_vertices = pcbddc->n_vertices; 4856 4857 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4858 4859 /* create auxiliary bitmask and allocate workspace */ 4860 if (!sub_schurs || !sub_schurs->reuse_solver) { 4861 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4862 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4863 for (i=0;i<n_vertices;i++) { 4864 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4865 } 4866 4867 for (i=0, n_R=0; i<pcis->n; i++) { 4868 if (!PetscBTLookup(bitmask,i)) { 4869 idx_R_local[n_R++] = i; 4870 } 4871 } 4872 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4873 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4874 4875 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4876 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4877 } 4878 4879 /* Block code */ 4880 vbs = 1; 4881 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4882 if (bs>1 && !(n_vertices%bs)) { 4883 PetscBool is_blocked = PETSC_TRUE; 4884 PetscInt *vary; 4885 if (!sub_schurs || !sub_schurs->reuse_solver) { 4886 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4887 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4888 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4889 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4890 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4891 for (i=0; i<pcis->n/bs; i++) { 4892 if (vary[i]!=0 && vary[i]!=bs) { 4893 is_blocked = PETSC_FALSE; 4894 break; 4895 } 4896 } 4897 ierr = PetscFree(vary);CHKERRQ(ierr); 4898 } else { 4899 /* Verify directly the R set */ 4900 for (i=0; i<n_R/bs; i++) { 4901 PetscInt j,node=idx_R_local[bs*i]; 4902 for (j=1; j<bs; j++) { 4903 if (node != idx_R_local[bs*i+j]-j) { 4904 is_blocked = PETSC_FALSE; 4905 break; 4906 } 4907 } 4908 } 4909 } 4910 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4911 vbs = bs; 4912 for (i=0;i<n_R/vbs;i++) { 4913 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4914 } 4915 } 4916 } 4917 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4918 if (sub_schurs && sub_schurs->reuse_solver) { 4919 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4920 4921 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4922 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4923 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4924 reuse_solver->is_R = pcbddc->is_R_local; 4925 } else { 4926 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4927 } 4928 4929 /* print some info if requested */ 4930 if (pcbddc->dbg_flag) { 4931 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4932 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4933 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4934 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4935 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4936 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4937 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4938 } 4939 4940 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4941 if (!sub_schurs || !sub_schurs->reuse_solver) { 4942 IS is_aux1,is_aux2; 4943 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4944 4945 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4946 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4947 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4948 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4949 for (i=0; i<n_D; i++) { 4950 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4951 } 4952 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4953 for (i=0, j=0; i<n_R; i++) { 4954 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4955 aux_array1[j++] = i; 4956 } 4957 } 4958 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4959 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4960 for (i=0, j=0; i<n_B; i++) { 4961 if (!PetscBTLookup(bitmask,is_indices[i])) { 4962 aux_array2[j++] = i; 4963 } 4964 } 4965 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4966 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4967 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4968 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4969 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4970 4971 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4972 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4973 for (i=0, j=0; i<n_R; i++) { 4974 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4975 aux_array1[j++] = i; 4976 } 4977 } 4978 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4979 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4980 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4981 } 4982 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4983 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4984 } else { 4985 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4986 IS tis; 4987 PetscInt schur_size; 4988 4989 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4990 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4991 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4992 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4993 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4994 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4995 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4996 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4997 } 4998 } 4999 PetscFunctionReturn(0); 5000 } 5001 5002 5003 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5004 { 5005 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5006 PC_IS *pcis = (PC_IS*)pc->data; 5007 PC pc_temp; 5008 Mat A_RR; 5009 MatReuse reuse; 5010 PetscScalar m_one = -1.0; 5011 PetscReal value; 5012 PetscInt n_D,n_R; 5013 PetscBool check_corr,issbaij; 5014 PetscErrorCode ierr; 5015 /* prefixes stuff */ 5016 char dir_prefix[256],neu_prefix[256],str_level[16]; 5017 size_t len; 5018 5019 PetscFunctionBegin; 5020 5021 /* compute prefixes */ 5022 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 5023 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 5024 if (!pcbddc->current_level) { 5025 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5026 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5027 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 5028 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 5029 } else { 5030 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 5031 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5032 len -= 15; /* remove "pc_bddc_coarse_" */ 5033 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5034 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5035 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5036 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5037 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 5038 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 5039 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 5040 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 5041 } 5042 5043 /* DIRICHLET PROBLEM */ 5044 if (dirichlet) { 5045 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5046 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5047 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 5048 if (pcbddc->dbg_flag) { 5049 Mat A_IIn; 5050 5051 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5052 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5053 pcis->A_II = A_IIn; 5054 } 5055 } 5056 if (pcbddc->local_mat->symmetric_set) { 5057 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5058 } 5059 /* Matrix for Dirichlet problem is pcis->A_II */ 5060 n_D = pcis->n - pcis->n_B; 5061 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5062 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5063 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5064 /* default */ 5065 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5066 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5067 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5068 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5069 if (issbaij) { 5070 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5071 } else { 5072 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5073 } 5074 /* Allow user's customization */ 5075 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5076 } 5077 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 5078 if (sub_schurs && sub_schurs->reuse_solver) { 5079 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5080 5081 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5082 } 5083 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5084 if (!n_D) { 5085 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5086 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5087 } 5088 /* Set Up KSP for Dirichlet problem of BDDC */ 5089 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 5090 /* set ksp_D into pcis data */ 5091 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5092 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5093 pcis->ksp_D = pcbddc->ksp_D; 5094 } 5095 5096 /* NEUMANN PROBLEM */ 5097 A_RR = 0; 5098 if (neumann) { 5099 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5100 PetscInt ibs,mbs; 5101 PetscBool issbaij, reuse_neumann_solver; 5102 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5103 5104 reuse_neumann_solver = PETSC_FALSE; 5105 if (sub_schurs && sub_schurs->reuse_solver) { 5106 IS iP; 5107 5108 reuse_neumann_solver = PETSC_TRUE; 5109 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5110 if (iP) reuse_neumann_solver = PETSC_FALSE; 5111 } 5112 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5113 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5114 if (pcbddc->ksp_R) { /* already created ksp */ 5115 PetscInt nn_R; 5116 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5117 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5118 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5119 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5120 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5121 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5122 reuse = MAT_INITIAL_MATRIX; 5123 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5124 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5125 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5126 reuse = MAT_INITIAL_MATRIX; 5127 } else { /* safe to reuse the matrix */ 5128 reuse = MAT_REUSE_MATRIX; 5129 } 5130 } 5131 /* last check */ 5132 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5133 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5134 reuse = MAT_INITIAL_MATRIX; 5135 } 5136 } else { /* first time, so we need to create the matrix */ 5137 reuse = MAT_INITIAL_MATRIX; 5138 } 5139 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 5140 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5141 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5142 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5143 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5144 if (matis->A == pcbddc->local_mat) { 5145 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5146 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5147 } else { 5148 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5149 } 5150 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5151 if (matis->A == pcbddc->local_mat) { 5152 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5153 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5154 } else { 5155 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5156 } 5157 } 5158 /* extract A_RR */ 5159 if (reuse_neumann_solver) { 5160 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5161 5162 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5163 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5164 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5165 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5166 } else { 5167 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5168 } 5169 } else { 5170 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5171 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5172 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5173 } 5174 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5175 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5176 } 5177 if (pcbddc->local_mat->symmetric_set) { 5178 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5179 } 5180 if (!pcbddc->ksp_R) { /* create object if not present */ 5181 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5182 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5183 /* default */ 5184 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5185 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5186 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5187 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5188 if (issbaij) { 5189 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5190 } else { 5191 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5192 } 5193 /* Allow user's customization */ 5194 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5195 } 5196 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5197 if (!n_R) { 5198 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5199 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5200 } 5201 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5202 /* Reuse solver if it is present */ 5203 if (reuse_neumann_solver) { 5204 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5205 5206 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5207 } 5208 /* Set Up KSP for Neumann problem of BDDC */ 5209 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 5210 } 5211 5212 if (pcbddc->dbg_flag) { 5213 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5214 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5215 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5216 } 5217 5218 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5219 check_corr = PETSC_FALSE; 5220 if (pcbddc->NullSpace_corr[0]) { 5221 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5222 } 5223 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5224 check_corr = PETSC_TRUE; 5225 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5226 } 5227 if (neumann && pcbddc->NullSpace_corr[2]) { 5228 check_corr = PETSC_TRUE; 5229 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5230 } 5231 /* check Dirichlet and Neumann solvers */ 5232 if (pcbddc->dbg_flag) { 5233 if (dirichlet) { /* Dirichlet */ 5234 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5235 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5236 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5237 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5238 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5239 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5240 if (check_corr) { 5241 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 5242 } 5243 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5244 } 5245 if (neumann) { /* Neumann */ 5246 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5247 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5248 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5249 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5250 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5251 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5252 if (check_corr) { 5253 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 5254 } 5255 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5256 } 5257 } 5258 /* free Neumann problem's matrix */ 5259 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5260 PetscFunctionReturn(0); 5261 } 5262 5263 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5264 { 5265 PetscErrorCode ierr; 5266 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5267 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5268 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 5269 5270 PetscFunctionBegin; 5271 if (!reuse_solver) { 5272 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5273 } 5274 if (!pcbddc->switch_static) { 5275 if (applytranspose && pcbddc->local_auxmat1) { 5276 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5277 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5278 } 5279 if (!reuse_solver) { 5280 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5281 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5282 } else { 5283 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5284 5285 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5286 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5287 } 5288 } else { 5289 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5290 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5291 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5292 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5293 if (applytranspose && pcbddc->local_auxmat1) { 5294 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5295 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5296 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5297 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5298 } 5299 } 5300 if (!reuse_solver || pcbddc->switch_static) { 5301 if (applytranspose) { 5302 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5303 } else { 5304 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5305 } 5306 } else { 5307 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5308 5309 if (applytranspose) { 5310 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5311 } else { 5312 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5313 } 5314 } 5315 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5316 if (!pcbddc->switch_static) { 5317 if (!reuse_solver) { 5318 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5319 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5320 } else { 5321 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5322 5323 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5324 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5325 } 5326 if (!applytranspose && pcbddc->local_auxmat1) { 5327 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5328 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5329 } 5330 } else { 5331 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5332 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5333 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5334 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5335 if (!applytranspose && pcbddc->local_auxmat1) { 5336 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5337 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5338 } 5339 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5340 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5341 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5342 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5343 } 5344 PetscFunctionReturn(0); 5345 } 5346 5347 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5348 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5349 { 5350 PetscErrorCode ierr; 5351 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5352 PC_IS* pcis = (PC_IS*) (pc->data); 5353 const PetscScalar zero = 0.0; 5354 5355 PetscFunctionBegin; 5356 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5357 if (!pcbddc->benign_apply_coarse_only) { 5358 if (applytranspose) { 5359 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5360 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5361 } else { 5362 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5363 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5364 } 5365 } else { 5366 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5367 } 5368 5369 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5370 if (pcbddc->benign_n) { 5371 PetscScalar *array; 5372 PetscInt j; 5373 5374 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5375 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5376 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5377 } 5378 5379 /* start communications from local primal nodes to rhs of coarse solver */ 5380 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5381 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5382 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5383 5384 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5385 if (pcbddc->coarse_ksp) { 5386 Mat coarse_mat; 5387 Vec rhs,sol; 5388 MatNullSpace nullsp; 5389 PetscBool isbddc = PETSC_FALSE; 5390 5391 if (pcbddc->benign_have_null) { 5392 PC coarse_pc; 5393 5394 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5395 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5396 /* we need to propagate to coarser levels the need for a possible benign correction */ 5397 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5398 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5399 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5400 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5401 } 5402 } 5403 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5404 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5405 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5406 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5407 if (nullsp) { 5408 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5409 } 5410 if (applytranspose) { 5411 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5412 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5413 } else { 5414 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5415 PC coarse_pc; 5416 5417 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5418 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5419 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5420 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5421 } else { 5422 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5423 } 5424 } 5425 /* we don't need the benign correction at coarser levels anymore */ 5426 if (pcbddc->benign_have_null && isbddc) { 5427 PC coarse_pc; 5428 PC_BDDC* coarsepcbddc; 5429 5430 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5431 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5432 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5433 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5434 } 5435 if (nullsp) { 5436 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5437 } 5438 } 5439 5440 /* Local solution on R nodes */ 5441 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5442 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5443 } 5444 /* communications from coarse sol to local primal nodes */ 5445 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5446 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5447 5448 /* Sum contributions from the two levels */ 5449 if (!pcbddc->benign_apply_coarse_only) { 5450 if (applytranspose) { 5451 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5452 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5453 } else { 5454 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5455 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5456 } 5457 /* store p0 */ 5458 if (pcbddc->benign_n) { 5459 PetscScalar *array; 5460 PetscInt j; 5461 5462 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5463 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5464 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5465 } 5466 } else { /* expand the coarse solution */ 5467 if (applytranspose) { 5468 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5469 } else { 5470 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5471 } 5472 } 5473 PetscFunctionReturn(0); 5474 } 5475 5476 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5477 { 5478 PetscErrorCode ierr; 5479 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5480 PetscScalar *array; 5481 Vec from,to; 5482 5483 PetscFunctionBegin; 5484 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5485 from = pcbddc->coarse_vec; 5486 to = pcbddc->vec1_P; 5487 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5488 Vec tvec; 5489 5490 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5491 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5492 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5493 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5494 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5495 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5496 } 5497 } else { /* from local to global -> put data in coarse right hand side */ 5498 from = pcbddc->vec1_P; 5499 to = pcbddc->coarse_vec; 5500 } 5501 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5502 PetscFunctionReturn(0); 5503 } 5504 5505 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5506 { 5507 PetscErrorCode ierr; 5508 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5509 PetscScalar *array; 5510 Vec from,to; 5511 5512 PetscFunctionBegin; 5513 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5514 from = pcbddc->coarse_vec; 5515 to = pcbddc->vec1_P; 5516 } else { /* from local to global -> put data in coarse right hand side */ 5517 from = pcbddc->vec1_P; 5518 to = pcbddc->coarse_vec; 5519 } 5520 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5521 if (smode == SCATTER_FORWARD) { 5522 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5523 Vec tvec; 5524 5525 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5526 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5527 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5528 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5529 } 5530 } else { 5531 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5532 ierr = VecResetArray(from);CHKERRQ(ierr); 5533 } 5534 } 5535 PetscFunctionReturn(0); 5536 } 5537 5538 /* uncomment for testing purposes */ 5539 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5540 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5541 { 5542 PetscErrorCode ierr; 5543 PC_IS* pcis = (PC_IS*)(pc->data); 5544 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5545 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5546 /* one and zero */ 5547 PetscScalar one=1.0,zero=0.0; 5548 /* space to store constraints and their local indices */ 5549 PetscScalar *constraints_data; 5550 PetscInt *constraints_idxs,*constraints_idxs_B; 5551 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5552 PetscInt *constraints_n; 5553 /* iterators */ 5554 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5555 /* BLAS integers */ 5556 PetscBLASInt lwork,lierr; 5557 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5558 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5559 /* reuse */ 5560 PetscInt olocal_primal_size,olocal_primal_size_cc; 5561 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5562 /* change of basis */ 5563 PetscBool qr_needed; 5564 PetscBT change_basis,qr_needed_idx; 5565 /* auxiliary stuff */ 5566 PetscInt *nnz,*is_indices; 5567 PetscInt ncc; 5568 /* some quantities */ 5569 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5570 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5571 PetscReal tol; /* tolerance for retaining eigenmodes */ 5572 5573 PetscFunctionBegin; 5574 tol = PetscSqrtReal(PETSC_SMALL); 5575 /* Destroy Mat objects computed previously */ 5576 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5577 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5578 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5579 /* save info on constraints from previous setup (if any) */ 5580 olocal_primal_size = pcbddc->local_primal_size; 5581 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5582 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5583 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5584 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5585 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5586 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5587 5588 if (!pcbddc->adaptive_selection) { 5589 IS ISForVertices,*ISForFaces,*ISForEdges; 5590 MatNullSpace nearnullsp; 5591 const Vec *nearnullvecs; 5592 Vec *localnearnullsp; 5593 PetscScalar *array; 5594 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5595 PetscBool nnsp_has_cnst; 5596 /* LAPACK working arrays for SVD or POD */ 5597 PetscBool skip_lapack,boolforchange; 5598 PetscScalar *work; 5599 PetscReal *singular_vals; 5600 #if defined(PETSC_USE_COMPLEX) 5601 PetscReal *rwork; 5602 #endif 5603 #if defined(PETSC_MISSING_LAPACK_GESVD) 5604 PetscScalar *temp_basis,*correlation_mat; 5605 #else 5606 PetscBLASInt dummy_int=1; 5607 PetscScalar dummy_scalar=1.; 5608 #endif 5609 5610 /* Get index sets for faces, edges and vertices from graph */ 5611 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5612 /* print some info */ 5613 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5614 PetscInt nv; 5615 5616 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5617 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5618 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5619 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5620 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5622 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5623 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5624 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5625 } 5626 5627 /* free unneeded index sets */ 5628 if (!pcbddc->use_vertices) { 5629 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5630 } 5631 if (!pcbddc->use_edges) { 5632 for (i=0;i<n_ISForEdges;i++) { 5633 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5634 } 5635 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5636 n_ISForEdges = 0; 5637 } 5638 if (!pcbddc->use_faces) { 5639 for (i=0;i<n_ISForFaces;i++) { 5640 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5641 } 5642 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5643 n_ISForFaces = 0; 5644 } 5645 5646 /* check if near null space is attached to global mat */ 5647 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5648 if (nearnullsp) { 5649 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5650 /* remove any stored info */ 5651 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5652 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5653 /* store information for BDDC solver reuse */ 5654 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5655 pcbddc->onearnullspace = nearnullsp; 5656 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5657 for (i=0;i<nnsp_size;i++) { 5658 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5659 } 5660 } else { /* if near null space is not provided BDDC uses constants by default */ 5661 nnsp_size = 0; 5662 nnsp_has_cnst = PETSC_TRUE; 5663 } 5664 /* get max number of constraints on a single cc */ 5665 max_constraints = nnsp_size; 5666 if (nnsp_has_cnst) max_constraints++; 5667 5668 /* 5669 Evaluate maximum storage size needed by the procedure 5670 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5671 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5672 There can be multiple constraints per connected component 5673 */ 5674 n_vertices = 0; 5675 if (ISForVertices) { 5676 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5677 } 5678 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5679 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5680 5681 total_counts = n_ISForFaces+n_ISForEdges; 5682 total_counts *= max_constraints; 5683 total_counts += n_vertices; 5684 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5685 5686 total_counts = 0; 5687 max_size_of_constraint = 0; 5688 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5689 IS used_is; 5690 if (i<n_ISForEdges) { 5691 used_is = ISForEdges[i]; 5692 } else { 5693 used_is = ISForFaces[i-n_ISForEdges]; 5694 } 5695 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5696 total_counts += j; 5697 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5698 } 5699 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5700 5701 /* get local part of global near null space vectors */ 5702 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5703 for (k=0;k<nnsp_size;k++) { 5704 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5705 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5706 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5707 } 5708 5709 /* whether or not to skip lapack calls */ 5710 skip_lapack = PETSC_TRUE; 5711 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5712 5713 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5714 if (!skip_lapack) { 5715 PetscScalar temp_work; 5716 5717 #if defined(PETSC_MISSING_LAPACK_GESVD) 5718 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5719 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5720 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5721 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5722 #if defined(PETSC_USE_COMPLEX) 5723 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5724 #endif 5725 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5726 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5727 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5728 lwork = -1; 5729 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5730 #if !defined(PETSC_USE_COMPLEX) 5731 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5732 #else 5733 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5734 #endif 5735 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5736 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5737 #else /* on missing GESVD */ 5738 /* SVD */ 5739 PetscInt max_n,min_n; 5740 max_n = max_size_of_constraint; 5741 min_n = max_constraints; 5742 if (max_size_of_constraint < max_constraints) { 5743 min_n = max_size_of_constraint; 5744 max_n = max_constraints; 5745 } 5746 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5747 #if defined(PETSC_USE_COMPLEX) 5748 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5749 #endif 5750 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5751 lwork = -1; 5752 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5753 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5754 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5755 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5756 #if !defined(PETSC_USE_COMPLEX) 5757 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5758 #else 5759 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5760 #endif 5761 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5762 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5763 #endif /* on missing GESVD */ 5764 /* Allocate optimal workspace */ 5765 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5766 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5767 } 5768 /* Now we can loop on constraining sets */ 5769 total_counts = 0; 5770 constraints_idxs_ptr[0] = 0; 5771 constraints_data_ptr[0] = 0; 5772 /* vertices */ 5773 if (n_vertices) { 5774 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5775 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5776 for (i=0;i<n_vertices;i++) { 5777 constraints_n[total_counts] = 1; 5778 constraints_data[total_counts] = 1.0; 5779 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5780 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5781 total_counts++; 5782 } 5783 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5784 n_vertices = total_counts; 5785 } 5786 5787 /* edges and faces */ 5788 total_counts_cc = total_counts; 5789 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5790 IS used_is; 5791 PetscBool idxs_copied = PETSC_FALSE; 5792 5793 if (ncc<n_ISForEdges) { 5794 used_is = ISForEdges[ncc]; 5795 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5796 } else { 5797 used_is = ISForFaces[ncc-n_ISForEdges]; 5798 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5799 } 5800 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5801 5802 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5803 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5804 /* change of basis should not be performed on local periodic nodes */ 5805 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5806 if (nnsp_has_cnst) { 5807 PetscScalar quad_value; 5808 5809 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5810 idxs_copied = PETSC_TRUE; 5811 5812 if (!pcbddc->use_nnsp_true) { 5813 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5814 } else { 5815 quad_value = 1.0; 5816 } 5817 for (j=0;j<size_of_constraint;j++) { 5818 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5819 } 5820 temp_constraints++; 5821 total_counts++; 5822 } 5823 for (k=0;k<nnsp_size;k++) { 5824 PetscReal real_value; 5825 PetscScalar *ptr_to_data; 5826 5827 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5828 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5829 for (j=0;j<size_of_constraint;j++) { 5830 ptr_to_data[j] = array[is_indices[j]]; 5831 } 5832 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5833 /* check if array is null on the connected component */ 5834 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5835 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5836 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 5837 temp_constraints++; 5838 total_counts++; 5839 if (!idxs_copied) { 5840 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5841 idxs_copied = PETSC_TRUE; 5842 } 5843 } 5844 } 5845 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5846 valid_constraints = temp_constraints; 5847 if (!pcbddc->use_nnsp_true && temp_constraints) { 5848 if (temp_constraints == 1) { /* just normalize the constraint */ 5849 PetscScalar norm,*ptr_to_data; 5850 5851 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5852 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5853 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5854 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5855 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5856 } else { /* perform SVD */ 5857 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5858 5859 #if defined(PETSC_MISSING_LAPACK_GESVD) 5860 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5861 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5862 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5863 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5864 from that computed using LAPACKgesvd 5865 -> This is due to a different computation of eigenvectors in LAPACKheev 5866 -> The quality of the POD-computed basis will be the same */ 5867 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5868 /* Store upper triangular part of correlation matrix */ 5869 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5870 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5871 for (j=0;j<temp_constraints;j++) { 5872 for (k=0;k<j+1;k++) { 5873 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5874 } 5875 } 5876 /* compute eigenvalues and eigenvectors of correlation matrix */ 5877 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5878 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5879 #if !defined(PETSC_USE_COMPLEX) 5880 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5881 #else 5882 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5883 #endif 5884 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5885 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5886 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5887 j = 0; 5888 while (j < temp_constraints && singular_vals[j] < tol) j++; 5889 total_counts = total_counts-j; 5890 valid_constraints = temp_constraints-j; 5891 /* scale and copy POD basis into used quadrature memory */ 5892 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5893 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5894 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5895 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5896 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5897 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5898 if (j<temp_constraints) { 5899 PetscInt ii; 5900 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5901 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5902 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5903 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5904 for (k=0;k<temp_constraints-j;k++) { 5905 for (ii=0;ii<size_of_constraint;ii++) { 5906 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5907 } 5908 } 5909 } 5910 #else /* on missing GESVD */ 5911 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5912 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5913 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5914 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5915 #if !defined(PETSC_USE_COMPLEX) 5916 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5917 #else 5918 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5919 #endif 5920 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5921 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5922 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5923 k = temp_constraints; 5924 if (k > size_of_constraint) k = size_of_constraint; 5925 j = 0; 5926 while (j < k && singular_vals[k-j-1] < tol) j++; 5927 valid_constraints = k-j; 5928 total_counts = total_counts-temp_constraints+valid_constraints; 5929 #endif /* on missing GESVD */ 5930 } 5931 } 5932 /* update pointers information */ 5933 if (valid_constraints) { 5934 constraints_n[total_counts_cc] = valid_constraints; 5935 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5936 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5937 /* set change_of_basis flag */ 5938 if (boolforchange) { 5939 PetscBTSet(change_basis,total_counts_cc); 5940 } 5941 total_counts_cc++; 5942 } 5943 } 5944 /* free workspace */ 5945 if (!skip_lapack) { 5946 ierr = PetscFree(work);CHKERRQ(ierr); 5947 #if defined(PETSC_USE_COMPLEX) 5948 ierr = PetscFree(rwork);CHKERRQ(ierr); 5949 #endif 5950 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5951 #if defined(PETSC_MISSING_LAPACK_GESVD) 5952 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5953 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5954 #endif 5955 } 5956 for (k=0;k<nnsp_size;k++) { 5957 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5958 } 5959 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5960 /* free index sets of faces, edges and vertices */ 5961 for (i=0;i<n_ISForFaces;i++) { 5962 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5963 } 5964 if (n_ISForFaces) { 5965 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5966 } 5967 for (i=0;i<n_ISForEdges;i++) { 5968 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5969 } 5970 if (n_ISForEdges) { 5971 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5972 } 5973 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5974 } else { 5975 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5976 5977 total_counts = 0; 5978 n_vertices = 0; 5979 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5980 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5981 } 5982 max_constraints = 0; 5983 total_counts_cc = 0; 5984 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5985 total_counts += pcbddc->adaptive_constraints_n[i]; 5986 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5987 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5988 } 5989 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5990 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5991 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5992 constraints_data = pcbddc->adaptive_constraints_data; 5993 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5994 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5995 total_counts_cc = 0; 5996 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5997 if (pcbddc->adaptive_constraints_n[i]) { 5998 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5999 } 6000 } 6001 #if 0 6002 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 6003 for (i=0;i<total_counts_cc;i++) { 6004 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 6005 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 6006 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 6007 printf(" %d",constraints_idxs[j]); 6008 } 6009 printf("\n"); 6010 printf("number of cc: %d\n",constraints_n[i]); 6011 } 6012 for (i=0;i<n_vertices;i++) { 6013 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 6014 } 6015 for (i=0;i<sub_schurs->n_subs;i++) { 6016 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 6017 } 6018 #endif 6019 6020 max_size_of_constraint = 0; 6021 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6022 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 6023 /* Change of basis */ 6024 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 6025 if (pcbddc->use_change_of_basis) { 6026 for (i=0;i<sub_schurs->n_subs;i++) { 6027 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6028 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 6029 } 6030 } 6031 } 6032 } 6033 pcbddc->local_primal_size = total_counts; 6034 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6035 6036 /* map constraints_idxs in boundary numbering */ 6037 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 6038 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 6039 6040 /* Create constraint matrix */ 6041 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6042 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 6043 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 6044 6045 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6046 /* determine if a QR strategy is needed for change of basis */ 6047 qr_needed = PETSC_FALSE; 6048 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6049 total_primal_vertices=0; 6050 pcbddc->local_primal_size_cc = 0; 6051 for (i=0;i<total_counts_cc;i++) { 6052 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6053 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6054 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6055 pcbddc->local_primal_size_cc += 1; 6056 } else if (PetscBTLookup(change_basis,i)) { 6057 for (k=0;k<constraints_n[i];k++) { 6058 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6059 } 6060 pcbddc->local_primal_size_cc += constraints_n[i]; 6061 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6062 PetscBTSet(qr_needed_idx,i); 6063 qr_needed = PETSC_TRUE; 6064 } 6065 } else { 6066 pcbddc->local_primal_size_cc += 1; 6067 } 6068 } 6069 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6070 pcbddc->n_vertices = total_primal_vertices; 6071 /* permute indices in order to have a sorted set of vertices */ 6072 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6073 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6074 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 6075 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6076 6077 /* nonzero structure of constraint matrix */ 6078 /* and get reference dof for local constraints */ 6079 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6080 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6081 6082 j = total_primal_vertices; 6083 total_counts = total_primal_vertices; 6084 cum = total_primal_vertices; 6085 for (i=n_vertices;i<total_counts_cc;i++) { 6086 if (!PetscBTLookup(change_basis,i)) { 6087 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6088 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6089 cum++; 6090 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6091 for (k=0;k<constraints_n[i];k++) { 6092 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6093 nnz[j+k] = size_of_constraint; 6094 } 6095 j += constraints_n[i]; 6096 } 6097 } 6098 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6099 ierr = PetscFree(nnz);CHKERRQ(ierr); 6100 6101 /* set values in constraint matrix */ 6102 for (i=0;i<total_primal_vertices;i++) { 6103 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6104 } 6105 total_counts = total_primal_vertices; 6106 for (i=n_vertices;i<total_counts_cc;i++) { 6107 if (!PetscBTLookup(change_basis,i)) { 6108 PetscInt *cols; 6109 6110 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6111 cols = constraints_idxs+constraints_idxs_ptr[i]; 6112 for (k=0;k<constraints_n[i];k++) { 6113 PetscInt row = total_counts+k; 6114 PetscScalar *vals; 6115 6116 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6117 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6118 } 6119 total_counts += constraints_n[i]; 6120 } 6121 } 6122 /* assembling */ 6123 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6124 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6125 6126 /* 6127 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6128 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 6129 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 6130 */ 6131 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6132 if (pcbddc->use_change_of_basis) { 6133 /* dual and primal dofs on a single cc */ 6134 PetscInt dual_dofs,primal_dofs; 6135 /* working stuff for GEQRF */ 6136 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 6137 PetscBLASInt lqr_work; 6138 /* working stuff for UNGQR */ 6139 PetscScalar *gqr_work,lgqr_work_t; 6140 PetscBLASInt lgqr_work; 6141 /* working stuff for TRTRS */ 6142 PetscScalar *trs_rhs; 6143 PetscBLASInt Blas_NRHS; 6144 /* pointers for values insertion into change of basis matrix */ 6145 PetscInt *start_rows,*start_cols; 6146 PetscScalar *start_vals; 6147 /* working stuff for values insertion */ 6148 PetscBT is_primal; 6149 PetscInt *aux_primal_numbering_B; 6150 /* matrix sizes */ 6151 PetscInt global_size,local_size; 6152 /* temporary change of basis */ 6153 Mat localChangeOfBasisMatrix; 6154 /* extra space for debugging */ 6155 PetscScalar *dbg_work; 6156 6157 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6158 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6159 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6160 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6161 /* nonzeros for local mat */ 6162 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6163 if (!pcbddc->benign_change || pcbddc->fake_change) { 6164 for (i=0;i<pcis->n;i++) nnz[i]=1; 6165 } else { 6166 const PetscInt *ii; 6167 PetscInt n; 6168 PetscBool flg_row; 6169 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6170 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6171 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6172 } 6173 for (i=n_vertices;i<total_counts_cc;i++) { 6174 if (PetscBTLookup(change_basis,i)) { 6175 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6176 if (PetscBTLookup(qr_needed_idx,i)) { 6177 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6178 } else { 6179 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6180 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6181 } 6182 } 6183 } 6184 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6185 ierr = PetscFree(nnz);CHKERRQ(ierr); 6186 /* Set interior change in the matrix */ 6187 if (!pcbddc->benign_change || pcbddc->fake_change) { 6188 for (i=0;i<pcis->n;i++) { 6189 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6190 } 6191 } else { 6192 const PetscInt *ii,*jj; 6193 PetscScalar *aa; 6194 PetscInt n; 6195 PetscBool flg_row; 6196 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6197 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6198 for (i=0;i<n;i++) { 6199 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6200 } 6201 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6202 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6203 } 6204 6205 if (pcbddc->dbg_flag) { 6206 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6207 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6208 } 6209 6210 6211 /* Now we loop on the constraints which need a change of basis */ 6212 /* 6213 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6214 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6215 6216 Basic blocks of change of basis matrix T computed by 6217 6218 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6219 6220 | 1 0 ... 0 s_1/S | 6221 | 0 1 ... 0 s_2/S | 6222 | ... | 6223 | 0 ... 1 s_{n-1}/S | 6224 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6225 6226 with S = \sum_{i=1}^n s_i^2 6227 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6228 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6229 6230 - QR decomposition of constraints otherwise 6231 */ 6232 if (qr_needed) { 6233 /* space to store Q */ 6234 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6235 /* array to store scaling factors for reflectors */ 6236 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6237 /* first we issue queries for optimal work */ 6238 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6239 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6240 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6241 lqr_work = -1; 6242 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6243 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6244 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6245 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6246 lgqr_work = -1; 6247 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6248 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6249 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6250 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6251 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6252 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6253 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6254 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6255 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6256 /* array to store rhs and solution of triangular solver */ 6257 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6258 /* allocating workspace for check */ 6259 if (pcbddc->dbg_flag) { 6260 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6261 } 6262 } 6263 /* array to store whether a node is primal or not */ 6264 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6265 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6266 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6267 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 6268 for (i=0;i<total_primal_vertices;i++) { 6269 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6270 } 6271 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6272 6273 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6274 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6275 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6276 if (PetscBTLookup(change_basis,total_counts)) { 6277 /* get constraint info */ 6278 primal_dofs = constraints_n[total_counts]; 6279 dual_dofs = size_of_constraint-primal_dofs; 6280 6281 if (pcbddc->dbg_flag) { 6282 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6283 } 6284 6285 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6286 6287 /* copy quadrature constraints for change of basis check */ 6288 if (pcbddc->dbg_flag) { 6289 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6290 } 6291 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6292 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6293 6294 /* compute QR decomposition of constraints */ 6295 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6296 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6297 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6298 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6299 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6300 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6301 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6302 6303 /* explictly compute R^-T */ 6304 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 6305 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6306 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6307 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6308 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6309 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6310 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6311 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6312 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6313 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6314 6315 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6316 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6317 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6318 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6319 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6320 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6321 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6322 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6323 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6324 6325 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6326 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6327 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6328 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6329 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6330 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6331 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6332 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6333 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6334 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6335 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6336 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6337 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6338 6339 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6340 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6341 /* insert cols for primal dofs */ 6342 for (j=0;j<primal_dofs;j++) { 6343 start_vals = &qr_basis[j*size_of_constraint]; 6344 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6345 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6346 } 6347 /* insert cols for dual dofs */ 6348 for (j=0,k=0;j<dual_dofs;k++) { 6349 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6350 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6351 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6352 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6353 j++; 6354 } 6355 } 6356 6357 /* check change of basis */ 6358 if (pcbddc->dbg_flag) { 6359 PetscInt ii,jj; 6360 PetscBool valid_qr=PETSC_TRUE; 6361 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6362 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6363 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6364 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6365 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6366 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6367 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6368 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6369 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6370 for (jj=0;jj<size_of_constraint;jj++) { 6371 for (ii=0;ii<primal_dofs;ii++) { 6372 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6373 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6374 } 6375 } 6376 if (!valid_qr) { 6377 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6378 for (jj=0;jj<size_of_constraint;jj++) { 6379 for (ii=0;ii<primal_dofs;ii++) { 6380 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6381 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6382 } 6383 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6384 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6385 } 6386 } 6387 } 6388 } else { 6389 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6390 } 6391 } 6392 } else { /* simple transformation block */ 6393 PetscInt row,col; 6394 PetscScalar val,norm; 6395 6396 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6397 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6398 for (j=0;j<size_of_constraint;j++) { 6399 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6400 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6401 if (!PetscBTLookup(is_primal,row_B)) { 6402 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6403 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6404 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6405 } else { 6406 for (k=0;k<size_of_constraint;k++) { 6407 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6408 if (row != col) { 6409 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6410 } else { 6411 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6412 } 6413 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6414 } 6415 } 6416 } 6417 if (pcbddc->dbg_flag) { 6418 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6419 } 6420 } 6421 } else { 6422 if (pcbddc->dbg_flag) { 6423 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6424 } 6425 } 6426 } 6427 6428 /* free workspace */ 6429 if (qr_needed) { 6430 if (pcbddc->dbg_flag) { 6431 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6432 } 6433 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6434 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6435 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6436 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6437 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6438 } 6439 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6440 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6441 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6442 6443 /* assembling of global change of variable */ 6444 if (!pcbddc->fake_change) { 6445 Mat tmat; 6446 PetscInt bs; 6447 6448 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6449 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6450 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6451 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6452 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6453 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6454 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6455 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6456 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6457 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6458 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6459 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6460 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6461 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6462 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6463 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6464 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6465 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6466 6467 /* check */ 6468 if (pcbddc->dbg_flag) { 6469 PetscReal error; 6470 Vec x,x_change; 6471 6472 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6473 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6474 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6475 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6476 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6477 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6478 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6479 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6480 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6481 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6482 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6483 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6484 if (error > PETSC_SMALL) { 6485 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6486 } 6487 ierr = VecDestroy(&x);CHKERRQ(ierr); 6488 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6489 } 6490 /* adapt sub_schurs computed (if any) */ 6491 if (pcbddc->use_deluxe_scaling) { 6492 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6493 6494 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6495 if (sub_schurs && sub_schurs->S_Ej_all) { 6496 Mat S_new,tmat; 6497 IS is_all_N,is_V_Sall = NULL; 6498 6499 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6500 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6501 if (pcbddc->deluxe_zerorows) { 6502 ISLocalToGlobalMapping NtoSall; 6503 IS is_V; 6504 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6505 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6506 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6507 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6508 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6509 } 6510 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6511 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6512 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6513 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6514 if (pcbddc->deluxe_zerorows) { 6515 const PetscScalar *array; 6516 const PetscInt *idxs_V,*idxs_all; 6517 PetscInt i,n_V; 6518 6519 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6520 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6521 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6522 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6523 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6524 for (i=0;i<n_V;i++) { 6525 PetscScalar val; 6526 PetscInt idx; 6527 6528 idx = idxs_V[i]; 6529 val = array[idxs_all[idxs_V[i]]]; 6530 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6531 } 6532 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6533 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6534 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6535 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6536 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6537 } 6538 sub_schurs->S_Ej_all = S_new; 6539 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6540 if (sub_schurs->sum_S_Ej_all) { 6541 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6542 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6543 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6544 if (pcbddc->deluxe_zerorows) { 6545 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6546 } 6547 sub_schurs->sum_S_Ej_all = S_new; 6548 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6549 } 6550 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6551 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6552 } 6553 /* destroy any change of basis context in sub_schurs */ 6554 if (sub_schurs && sub_schurs->change) { 6555 PetscInt i; 6556 6557 for (i=0;i<sub_schurs->n_subs;i++) { 6558 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6559 } 6560 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6561 } 6562 } 6563 if (pcbddc->switch_static) { /* need to save the local change */ 6564 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6565 } else { 6566 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6567 } 6568 /* determine if any process has changed the pressures locally */ 6569 pcbddc->change_interior = pcbddc->benign_have_null; 6570 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6571 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6572 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6573 pcbddc->use_qr_single = qr_needed; 6574 } 6575 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6576 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6577 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6578 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6579 } else { 6580 Mat benign_global = NULL; 6581 if (pcbddc->benign_have_null) { 6582 Mat tmat; 6583 6584 pcbddc->change_interior = PETSC_TRUE; 6585 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6586 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6587 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6588 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6589 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6590 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6591 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6592 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6593 if (pcbddc->benign_change) { 6594 Mat M; 6595 6596 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6597 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6598 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6599 ierr = MatDestroy(&M);CHKERRQ(ierr); 6600 } else { 6601 Mat eye; 6602 PetscScalar *array; 6603 6604 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6605 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6606 for (i=0;i<pcis->n;i++) { 6607 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6608 } 6609 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6610 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6611 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6612 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6613 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6614 } 6615 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6616 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6617 } 6618 if (pcbddc->user_ChangeOfBasisMatrix) { 6619 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6620 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6621 } else if (pcbddc->benign_have_null) { 6622 pcbddc->ChangeOfBasisMatrix = benign_global; 6623 } 6624 } 6625 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6626 IS is_global; 6627 const PetscInt *gidxs; 6628 6629 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6630 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6631 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6632 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6633 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6634 } 6635 } 6636 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6637 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6638 } 6639 6640 if (!pcbddc->fake_change) { 6641 /* add pressure dofs to set of primal nodes for numbering purposes */ 6642 for (i=0;i<pcbddc->benign_n;i++) { 6643 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6644 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6645 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6646 pcbddc->local_primal_size_cc++; 6647 pcbddc->local_primal_size++; 6648 } 6649 6650 /* check if a new primal space has been introduced (also take into account benign trick) */ 6651 pcbddc->new_primal_space_local = PETSC_TRUE; 6652 if (olocal_primal_size == pcbddc->local_primal_size) { 6653 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6654 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6655 if (!pcbddc->new_primal_space_local) { 6656 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6657 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6658 } 6659 } 6660 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6661 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6662 } 6663 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6664 6665 /* flush dbg viewer */ 6666 if (pcbddc->dbg_flag) { 6667 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6668 } 6669 6670 /* free workspace */ 6671 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6672 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6673 if (!pcbddc->adaptive_selection) { 6674 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6675 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6676 } else { 6677 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6678 pcbddc->adaptive_constraints_idxs_ptr, 6679 pcbddc->adaptive_constraints_data_ptr, 6680 pcbddc->adaptive_constraints_idxs, 6681 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6682 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6683 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6684 } 6685 PetscFunctionReturn(0); 6686 } 6687 /* #undef PETSC_MISSING_LAPACK_GESVD */ 6688 6689 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6690 { 6691 ISLocalToGlobalMapping map; 6692 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6693 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6694 PetscInt i,N; 6695 PetscBool rcsr = PETSC_FALSE; 6696 PetscErrorCode ierr; 6697 6698 PetscFunctionBegin; 6699 if (pcbddc->recompute_topography) { 6700 pcbddc->graphanalyzed = PETSC_FALSE; 6701 /* Reset previously computed graph */ 6702 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6703 /* Init local Graph struct */ 6704 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6705 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6706 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6707 6708 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6709 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6710 } 6711 /* Check validity of the csr graph passed in by the user */ 6712 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6713 6714 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6715 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6716 PetscInt *xadj,*adjncy; 6717 PetscInt nvtxs; 6718 PetscBool flg_row=PETSC_FALSE; 6719 6720 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6721 if (flg_row) { 6722 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6723 pcbddc->computed_rowadj = PETSC_TRUE; 6724 } 6725 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6726 rcsr = PETSC_TRUE; 6727 } 6728 if (pcbddc->dbg_flag) { 6729 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6730 } 6731 6732 /* Setup of Graph */ 6733 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6734 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6735 6736 /* attach info on disconnected subdomains if present */ 6737 if (pcbddc->n_local_subs) { 6738 PetscInt *local_subs; 6739 6740 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6741 for (i=0;i<pcbddc->n_local_subs;i++) { 6742 const PetscInt *idxs; 6743 PetscInt nl,j; 6744 6745 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6746 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6747 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6748 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6749 } 6750 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6751 pcbddc->mat_graph->local_subs = local_subs; 6752 } 6753 } 6754 6755 if (!pcbddc->graphanalyzed) { 6756 /* Graph's connected components analysis */ 6757 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6758 pcbddc->graphanalyzed = PETSC_TRUE; 6759 } 6760 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6761 PetscFunctionReturn(0); 6762 } 6763 6764 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6765 { 6766 PetscInt i,j; 6767 PetscScalar *alphas; 6768 PetscErrorCode ierr; 6769 6770 PetscFunctionBegin; 6771 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6772 for (i=0;i<n;i++) { 6773 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6774 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6775 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6776 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6777 } 6778 ierr = PetscFree(alphas);CHKERRQ(ierr); 6779 PetscFunctionReturn(0); 6780 } 6781 6782 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6783 { 6784 Mat A; 6785 PetscInt n_neighs,*neighs,*n_shared,**shared; 6786 PetscMPIInt size,rank,color; 6787 PetscInt *xadj,*adjncy; 6788 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6789 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6790 PetscInt void_procs,*procs_candidates = NULL; 6791 PetscInt xadj_count,*count; 6792 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6793 PetscSubcomm psubcomm; 6794 MPI_Comm subcomm; 6795 PetscErrorCode ierr; 6796 6797 PetscFunctionBegin; 6798 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6799 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6800 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6801 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6802 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6803 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6804 6805 if (have_void) *have_void = PETSC_FALSE; 6806 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6807 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6808 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6809 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6810 im_active = !!n; 6811 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6812 void_procs = size - active_procs; 6813 /* get ranks of of non-active processes in mat communicator */ 6814 if (void_procs) { 6815 PetscInt ncand; 6816 6817 if (have_void) *have_void = PETSC_TRUE; 6818 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6819 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6820 for (i=0,ncand=0;i<size;i++) { 6821 if (!procs_candidates[i]) { 6822 procs_candidates[ncand++] = i; 6823 } 6824 } 6825 /* force n_subdomains to be not greater that the number of non-active processes */ 6826 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6827 } 6828 6829 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6830 number of subdomains requested 1 -> send to master or first candidate in voids */ 6831 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6832 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6833 PetscInt issize,isidx,dest; 6834 if (*n_subdomains == 1) dest = 0; 6835 else dest = rank; 6836 if (im_active) { 6837 issize = 1; 6838 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6839 isidx = procs_candidates[dest]; 6840 } else { 6841 isidx = dest; 6842 } 6843 } else { 6844 issize = 0; 6845 isidx = -1; 6846 } 6847 if (*n_subdomains != 1) *n_subdomains = active_procs; 6848 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6849 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6850 PetscFunctionReturn(0); 6851 } 6852 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6853 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6854 threshold = PetscMax(threshold,2); 6855 6856 /* Get info on mapping */ 6857 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6858 6859 /* build local CSR graph of subdomains' connectivity */ 6860 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6861 xadj[0] = 0; 6862 xadj[1] = PetscMax(n_neighs-1,0); 6863 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6864 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6865 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6866 for (i=1;i<n_neighs;i++) 6867 for (j=0;j<n_shared[i];j++) 6868 count[shared[i][j]] += 1; 6869 6870 xadj_count = 0; 6871 for (i=1;i<n_neighs;i++) { 6872 for (j=0;j<n_shared[i];j++) { 6873 if (count[shared[i][j]] < threshold) { 6874 adjncy[xadj_count] = neighs[i]; 6875 adjncy_wgt[xadj_count] = n_shared[i]; 6876 xadj_count++; 6877 break; 6878 } 6879 } 6880 } 6881 xadj[1] = xadj_count; 6882 ierr = PetscFree(count);CHKERRQ(ierr); 6883 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6884 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6885 6886 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6887 6888 /* Restrict work on active processes only */ 6889 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6890 if (void_procs) { 6891 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6892 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6893 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6894 subcomm = PetscSubcommChild(psubcomm); 6895 } else { 6896 psubcomm = NULL; 6897 subcomm = PetscObjectComm((PetscObject)mat); 6898 } 6899 6900 v_wgt = NULL; 6901 if (!color) { 6902 ierr = PetscFree(xadj);CHKERRQ(ierr); 6903 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6904 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6905 } else { 6906 Mat subdomain_adj; 6907 IS new_ranks,new_ranks_contig; 6908 MatPartitioning partitioner; 6909 PetscInt rstart=0,rend=0; 6910 PetscInt *is_indices,*oldranks; 6911 PetscMPIInt size; 6912 PetscBool aggregate; 6913 6914 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6915 if (void_procs) { 6916 PetscInt prank = rank; 6917 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6918 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6919 for (i=0;i<xadj[1];i++) { 6920 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6921 } 6922 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6923 } else { 6924 oldranks = NULL; 6925 } 6926 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6927 if (aggregate) { /* TODO: all this part could be made more efficient */ 6928 PetscInt lrows,row,ncols,*cols; 6929 PetscMPIInt nrank; 6930 PetscScalar *vals; 6931 6932 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6933 lrows = 0; 6934 if (nrank<redprocs) { 6935 lrows = size/redprocs; 6936 if (nrank<size%redprocs) lrows++; 6937 } 6938 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6939 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6940 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6941 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6942 row = nrank; 6943 ncols = xadj[1]-xadj[0]; 6944 cols = adjncy; 6945 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6946 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6947 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6948 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6949 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6950 ierr = PetscFree(xadj);CHKERRQ(ierr); 6951 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6952 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6953 ierr = PetscFree(vals);CHKERRQ(ierr); 6954 if (use_vwgt) { 6955 Vec v; 6956 const PetscScalar *array; 6957 PetscInt nl; 6958 6959 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6960 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 6961 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6962 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6963 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6964 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6965 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6966 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6967 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6968 ierr = VecDestroy(&v);CHKERRQ(ierr); 6969 } 6970 } else { 6971 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6972 if (use_vwgt) { 6973 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6974 v_wgt[0] = n; 6975 } 6976 } 6977 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6978 6979 /* Partition */ 6980 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6981 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6982 if (v_wgt) { 6983 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6984 } 6985 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6986 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6987 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6988 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6989 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6990 6991 /* renumber new_ranks to avoid "holes" in new set of processors */ 6992 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6993 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6994 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6995 if (!aggregate) { 6996 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6997 #if defined(PETSC_USE_DEBUG) 6998 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6999 #endif 7000 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7001 } else if (oldranks) { 7002 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7003 } else { 7004 ranks_send_to_idx[0] = is_indices[0]; 7005 } 7006 } else { 7007 PetscInt idx = 0; 7008 PetscMPIInt tag; 7009 MPI_Request *reqs; 7010 7011 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 7012 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 7013 for (i=rstart;i<rend;i++) { 7014 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 7015 } 7016 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 7017 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7018 ierr = PetscFree(reqs);CHKERRQ(ierr); 7019 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7020 #if defined(PETSC_USE_DEBUG) 7021 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7022 #endif 7023 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7024 } else if (oldranks) { 7025 ranks_send_to_idx[0] = oldranks[idx]; 7026 } else { 7027 ranks_send_to_idx[0] = idx; 7028 } 7029 } 7030 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7031 /* clean up */ 7032 ierr = PetscFree(oldranks);CHKERRQ(ierr); 7033 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 7034 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 7035 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 7036 } 7037 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 7038 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7039 7040 /* assemble parallel IS for sends */ 7041 i = 1; 7042 if (!color) i=0; 7043 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 7044 PetscFunctionReturn(0); 7045 } 7046 7047 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7048 7049 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7050 { 7051 Mat local_mat; 7052 IS is_sends_internal; 7053 PetscInt rows,cols,new_local_rows; 7054 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7055 PetscBool ismatis,isdense,newisdense,destroy_mat; 7056 ISLocalToGlobalMapping l2gmap; 7057 PetscInt* l2gmap_indices; 7058 const PetscInt* is_indices; 7059 MatType new_local_type; 7060 /* buffers */ 7061 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7062 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7063 PetscInt *recv_buffer_idxs_local; 7064 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 7065 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7066 /* MPI */ 7067 MPI_Comm comm,comm_n; 7068 PetscSubcomm subcomm; 7069 PetscMPIInt n_sends,n_recvs,commsize; 7070 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7071 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7072 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7073 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7074 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7075 PetscErrorCode ierr; 7076 7077 PetscFunctionBegin; 7078 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7079 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7080 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7081 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7082 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7083 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7084 PetscValidLogicalCollectiveBool(mat,reuse,6); 7085 PetscValidLogicalCollectiveInt(mat,nis,8); 7086 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7087 if (nvecs) { 7088 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7089 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7090 } 7091 /* further checks */ 7092 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7093 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7094 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7095 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7096 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7097 if (reuse && *mat_n) { 7098 PetscInt mrows,mcols,mnrows,mncols; 7099 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7100 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7101 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7102 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7103 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7104 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7105 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7106 } 7107 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7108 PetscValidLogicalCollectiveInt(mat,bs,0); 7109 7110 /* prepare IS for sending if not provided */ 7111 if (!is_sends) { 7112 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7113 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7114 } else { 7115 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7116 is_sends_internal = is_sends; 7117 } 7118 7119 /* get comm */ 7120 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7121 7122 /* compute number of sends */ 7123 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7124 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7125 7126 /* compute number of receives */ 7127 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 7128 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 7129 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 7130 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7131 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7132 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7133 ierr = PetscFree(iflags);CHKERRQ(ierr); 7134 7135 /* restrict comm if requested */ 7136 subcomm = 0; 7137 destroy_mat = PETSC_FALSE; 7138 if (restrict_comm) { 7139 PetscMPIInt color,subcommsize; 7140 7141 color = 0; 7142 if (restrict_full) { 7143 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7144 } else { 7145 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7146 } 7147 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7148 subcommsize = commsize - subcommsize; 7149 /* check if reuse has been requested */ 7150 if (reuse) { 7151 if (*mat_n) { 7152 PetscMPIInt subcommsize2; 7153 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 7154 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7155 comm_n = PetscObjectComm((PetscObject)*mat_n); 7156 } else { 7157 comm_n = PETSC_COMM_SELF; 7158 } 7159 } else { /* MAT_INITIAL_MATRIX */ 7160 PetscMPIInt rank; 7161 7162 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 7163 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7164 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7165 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7166 comm_n = PetscSubcommChild(subcomm); 7167 } 7168 /* flag to destroy *mat_n if not significative */ 7169 if (color) destroy_mat = PETSC_TRUE; 7170 } else { 7171 comm_n = comm; 7172 } 7173 7174 /* prepare send/receive buffers */ 7175 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 7176 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 7177 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 7178 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 7179 if (nis) { 7180 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 7181 } 7182 7183 /* Get data from local matrices */ 7184 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7185 /* TODO: See below some guidelines on how to prepare the local buffers */ 7186 /* 7187 send_buffer_vals should contain the raw values of the local matrix 7188 send_buffer_idxs should contain: 7189 - MatType_PRIVATE type 7190 - PetscInt size_of_l2gmap 7191 - PetscInt global_row_indices[size_of_l2gmap] 7192 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7193 */ 7194 else { 7195 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7196 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7197 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7198 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7199 send_buffer_idxs[1] = i; 7200 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7201 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 7202 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7203 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7204 for (i=0;i<n_sends;i++) { 7205 ilengths_vals[is_indices[i]] = len*len; 7206 ilengths_idxs[is_indices[i]] = len+2; 7207 } 7208 } 7209 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7210 /* additional is (if any) */ 7211 if (nis) { 7212 PetscMPIInt psum; 7213 PetscInt j; 7214 for (j=0,psum=0;j<nis;j++) { 7215 PetscInt plen; 7216 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7217 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7218 psum += len+1; /* indices + lenght */ 7219 } 7220 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7221 for (j=0,psum=0;j<nis;j++) { 7222 PetscInt plen; 7223 const PetscInt *is_array_idxs; 7224 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7225 send_buffer_idxs_is[psum] = plen; 7226 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7227 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 7228 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7229 psum += plen+1; /* indices + lenght */ 7230 } 7231 for (i=0;i<n_sends;i++) { 7232 ilengths_idxs_is[is_indices[i]] = psum; 7233 } 7234 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7235 } 7236 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7237 7238 buf_size_idxs = 0; 7239 buf_size_vals = 0; 7240 buf_size_idxs_is = 0; 7241 buf_size_vecs = 0; 7242 for (i=0;i<n_recvs;i++) { 7243 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7244 buf_size_vals += (PetscInt)olengths_vals[i]; 7245 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7246 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7247 } 7248 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7249 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7250 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7251 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7252 7253 /* get new tags for clean communications */ 7254 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7255 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7256 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7257 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7258 7259 /* allocate for requests */ 7260 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7261 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7262 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7263 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7264 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7265 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7266 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7267 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7268 7269 /* communications */ 7270 ptr_idxs = recv_buffer_idxs; 7271 ptr_vals = recv_buffer_vals; 7272 ptr_idxs_is = recv_buffer_idxs_is; 7273 ptr_vecs = recv_buffer_vecs; 7274 for (i=0;i<n_recvs;i++) { 7275 source_dest = onodes[i]; 7276 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7277 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7278 ptr_idxs += olengths_idxs[i]; 7279 ptr_vals += olengths_vals[i]; 7280 if (nis) { 7281 source_dest = onodes_is[i]; 7282 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7283 ptr_idxs_is += olengths_idxs_is[i]; 7284 } 7285 if (nvecs) { 7286 source_dest = onodes[i]; 7287 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7288 ptr_vecs += olengths_idxs[i]-2; 7289 } 7290 } 7291 for (i=0;i<n_sends;i++) { 7292 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7293 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7294 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7295 if (nis) { 7296 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7297 } 7298 if (nvecs) { 7299 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7300 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7301 } 7302 } 7303 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7304 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7305 7306 /* assemble new l2g map */ 7307 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7308 ptr_idxs = recv_buffer_idxs; 7309 new_local_rows = 0; 7310 for (i=0;i<n_recvs;i++) { 7311 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7312 ptr_idxs += olengths_idxs[i]; 7313 } 7314 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7315 ptr_idxs = recv_buffer_idxs; 7316 new_local_rows = 0; 7317 for (i=0;i<n_recvs;i++) { 7318 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 7319 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7320 ptr_idxs += olengths_idxs[i]; 7321 } 7322 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7323 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7324 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7325 7326 /* infer new local matrix type from received local matrices type */ 7327 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7328 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7329 if (n_recvs) { 7330 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7331 ptr_idxs = recv_buffer_idxs; 7332 for (i=0;i<n_recvs;i++) { 7333 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7334 new_local_type_private = MATAIJ_PRIVATE; 7335 break; 7336 } 7337 ptr_idxs += olengths_idxs[i]; 7338 } 7339 switch (new_local_type_private) { 7340 case MATDENSE_PRIVATE: 7341 new_local_type = MATSEQAIJ; 7342 bs = 1; 7343 break; 7344 case MATAIJ_PRIVATE: 7345 new_local_type = MATSEQAIJ; 7346 bs = 1; 7347 break; 7348 case MATBAIJ_PRIVATE: 7349 new_local_type = MATSEQBAIJ; 7350 break; 7351 case MATSBAIJ_PRIVATE: 7352 new_local_type = MATSEQSBAIJ; 7353 break; 7354 default: 7355 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7356 break; 7357 } 7358 } else { /* by default, new_local_type is seqaij */ 7359 new_local_type = MATSEQAIJ; 7360 bs = 1; 7361 } 7362 7363 /* create MATIS object if needed */ 7364 if (!reuse) { 7365 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7366 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7367 } else { 7368 /* it also destroys the local matrices */ 7369 if (*mat_n) { 7370 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7371 } else { /* this is a fake object */ 7372 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7373 } 7374 } 7375 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7376 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7377 7378 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7379 7380 /* Global to local map of received indices */ 7381 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7382 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7383 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7384 7385 /* restore attributes -> type of incoming data and its size */ 7386 buf_size_idxs = 0; 7387 for (i=0;i<n_recvs;i++) { 7388 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7389 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7390 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7391 } 7392 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7393 7394 /* set preallocation */ 7395 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7396 if (!newisdense) { 7397 PetscInt *new_local_nnz=0; 7398 7399 ptr_idxs = recv_buffer_idxs_local; 7400 if (n_recvs) { 7401 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7402 } 7403 for (i=0;i<n_recvs;i++) { 7404 PetscInt j; 7405 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7406 for (j=0;j<*(ptr_idxs+1);j++) { 7407 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7408 } 7409 } else { 7410 /* TODO */ 7411 } 7412 ptr_idxs += olengths_idxs[i]; 7413 } 7414 if (new_local_nnz) { 7415 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7416 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7417 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7418 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7419 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7420 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7421 } else { 7422 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7423 } 7424 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7425 } else { 7426 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7427 } 7428 7429 /* set values */ 7430 ptr_vals = recv_buffer_vals; 7431 ptr_idxs = recv_buffer_idxs_local; 7432 for (i=0;i<n_recvs;i++) { 7433 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7434 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7435 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7436 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7437 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7438 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7439 } else { 7440 /* TODO */ 7441 } 7442 ptr_idxs += olengths_idxs[i]; 7443 ptr_vals += olengths_vals[i]; 7444 } 7445 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7446 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7447 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7448 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7449 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7450 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7451 7452 #if 0 7453 if (!restrict_comm) { /* check */ 7454 Vec lvec,rvec; 7455 PetscReal infty_error; 7456 7457 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7458 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7459 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7460 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7461 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7462 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7463 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7464 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7465 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7466 } 7467 #endif 7468 7469 /* assemble new additional is (if any) */ 7470 if (nis) { 7471 PetscInt **temp_idxs,*count_is,j,psum; 7472 7473 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7474 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7475 ptr_idxs = recv_buffer_idxs_is; 7476 psum = 0; 7477 for (i=0;i<n_recvs;i++) { 7478 for (j=0;j<nis;j++) { 7479 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7480 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7481 psum += plen; 7482 ptr_idxs += plen+1; /* shift pointer to received data */ 7483 } 7484 } 7485 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7486 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7487 for (i=1;i<nis;i++) { 7488 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7489 } 7490 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7491 ptr_idxs = recv_buffer_idxs_is; 7492 for (i=0;i<n_recvs;i++) { 7493 for (j=0;j<nis;j++) { 7494 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7495 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7496 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7497 ptr_idxs += plen+1; /* shift pointer to received data */ 7498 } 7499 } 7500 for (i=0;i<nis;i++) { 7501 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7502 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7503 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7504 } 7505 ierr = PetscFree(count_is);CHKERRQ(ierr); 7506 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7507 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7508 } 7509 /* free workspace */ 7510 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7511 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7512 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7513 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7514 if (isdense) { 7515 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7516 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7517 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7518 } else { 7519 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7520 } 7521 if (nis) { 7522 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7523 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7524 } 7525 7526 if (nvecs) { 7527 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7528 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7529 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7530 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7531 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7532 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7533 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7534 /* set values */ 7535 ptr_vals = recv_buffer_vecs; 7536 ptr_idxs = recv_buffer_idxs_local; 7537 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7538 for (i=0;i<n_recvs;i++) { 7539 PetscInt j; 7540 for (j=0;j<*(ptr_idxs+1);j++) { 7541 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7542 } 7543 ptr_idxs += olengths_idxs[i]; 7544 ptr_vals += olengths_idxs[i]-2; 7545 } 7546 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7547 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7548 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7549 } 7550 7551 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7552 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7553 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7554 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7555 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7556 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7557 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7558 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7559 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7560 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7561 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7562 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7563 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7564 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7565 ierr = PetscFree(onodes);CHKERRQ(ierr); 7566 if (nis) { 7567 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7568 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7569 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7570 } 7571 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7572 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7573 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7574 for (i=0;i<nis;i++) { 7575 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7576 } 7577 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7578 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7579 } 7580 *mat_n = NULL; 7581 } 7582 PetscFunctionReturn(0); 7583 } 7584 7585 /* temporary hack into ksp private data structure */ 7586 #include <petsc/private/kspimpl.h> 7587 7588 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7589 { 7590 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7591 PC_IS *pcis = (PC_IS*)pc->data; 7592 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7593 Mat coarsedivudotp = NULL; 7594 Mat coarseG,t_coarse_mat_is; 7595 MatNullSpace CoarseNullSpace = NULL; 7596 ISLocalToGlobalMapping coarse_islg; 7597 IS coarse_is,*isarray; 7598 PetscInt i,im_active=-1,active_procs=-1; 7599 PetscInt nis,nisdofs,nisneu,nisvert; 7600 PC pc_temp; 7601 PCType coarse_pc_type; 7602 KSPType coarse_ksp_type; 7603 PetscBool multilevel_requested,multilevel_allowed; 7604 PetscBool coarse_reuse; 7605 PetscInt ncoarse,nedcfield; 7606 PetscBool compute_vecs = PETSC_FALSE; 7607 PetscScalar *array; 7608 MatReuse coarse_mat_reuse; 7609 PetscBool restr, full_restr, have_void; 7610 PetscMPIInt commsize; 7611 PetscErrorCode ierr; 7612 7613 PetscFunctionBegin; 7614 /* Assign global numbering to coarse dofs */ 7615 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7616 PetscInt ocoarse_size; 7617 compute_vecs = PETSC_TRUE; 7618 7619 pcbddc->new_primal_space = PETSC_TRUE; 7620 ocoarse_size = pcbddc->coarse_size; 7621 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7622 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7623 /* see if we can avoid some work */ 7624 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7625 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7626 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7627 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7628 coarse_reuse = PETSC_FALSE; 7629 } else { /* we can safely reuse already computed coarse matrix */ 7630 coarse_reuse = PETSC_TRUE; 7631 } 7632 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7633 coarse_reuse = PETSC_FALSE; 7634 } 7635 /* reset any subassembling information */ 7636 if (!coarse_reuse || pcbddc->recompute_topography) { 7637 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7638 } 7639 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7640 coarse_reuse = PETSC_TRUE; 7641 } 7642 /* assemble coarse matrix */ 7643 if (coarse_reuse && pcbddc->coarse_ksp) { 7644 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7645 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7646 coarse_mat_reuse = MAT_REUSE_MATRIX; 7647 } else { 7648 coarse_mat = NULL; 7649 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7650 } 7651 7652 /* creates temporary l2gmap and IS for coarse indexes */ 7653 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7654 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7655 7656 /* creates temporary MATIS object for coarse matrix */ 7657 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7658 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7659 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7660 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7661 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7662 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7663 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7664 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7665 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7666 7667 /* count "active" (i.e. with positive local size) and "void" processes */ 7668 im_active = !!(pcis->n); 7669 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7670 7671 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7672 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7673 /* full_restr : just use the receivers from the subassembling pattern */ 7674 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);CHKERRQ(ierr); 7675 coarse_mat_is = NULL; 7676 multilevel_allowed = PETSC_FALSE; 7677 multilevel_requested = PETSC_FALSE; 7678 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7679 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7680 if (multilevel_requested) { 7681 ncoarse = active_procs/pcbddc->coarsening_ratio; 7682 restr = PETSC_FALSE; 7683 full_restr = PETSC_FALSE; 7684 } else { 7685 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7686 restr = PETSC_TRUE; 7687 full_restr = PETSC_TRUE; 7688 } 7689 if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7690 ncoarse = PetscMax(1,ncoarse); 7691 if (!pcbddc->coarse_subassembling) { 7692 if (pcbddc->coarsening_ratio > 1) { 7693 if (multilevel_requested) { 7694 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7695 } else { 7696 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7697 } 7698 } else { 7699 PetscMPIInt rank; 7700 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7701 have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE; 7702 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7703 } 7704 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7705 PetscInt psum; 7706 if (pcbddc->coarse_ksp) psum = 1; 7707 else psum = 0; 7708 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7709 if (ncoarse < commsize) have_void = PETSC_TRUE; 7710 } 7711 /* determine if we can go multilevel */ 7712 if (multilevel_requested) { 7713 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7714 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7715 } 7716 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7717 7718 /* dump subassembling pattern */ 7719 if (pcbddc->dbg_flag && multilevel_allowed) { 7720 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7721 } 7722 7723 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7724 nedcfield = -1; 7725 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7726 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7727 const PetscInt *idxs; 7728 ISLocalToGlobalMapping tmap; 7729 7730 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7731 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7732 /* allocate space for temporary storage */ 7733 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7734 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7735 /* allocate for IS array */ 7736 nisdofs = pcbddc->n_ISForDofsLocal; 7737 if (pcbddc->nedclocal) { 7738 if (pcbddc->nedfield > -1) { 7739 nedcfield = pcbddc->nedfield; 7740 } else { 7741 nedcfield = 0; 7742 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7743 nisdofs = 1; 7744 } 7745 } 7746 nisneu = !!pcbddc->NeumannBoundariesLocal; 7747 nisvert = 0; /* nisvert is not used */ 7748 nis = nisdofs + nisneu + nisvert; 7749 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7750 /* dofs splitting */ 7751 for (i=0;i<nisdofs;i++) { 7752 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7753 if (nedcfield != i) { 7754 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7755 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7756 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7757 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7758 } else { 7759 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7760 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7761 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7762 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7763 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7764 } 7765 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7766 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7767 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7768 } 7769 /* neumann boundaries */ 7770 if (pcbddc->NeumannBoundariesLocal) { 7771 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7772 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7773 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7774 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7775 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7776 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7777 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7778 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7779 } 7780 /* free memory */ 7781 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7782 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7783 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7784 } else { 7785 nis = 0; 7786 nisdofs = 0; 7787 nisneu = 0; 7788 nisvert = 0; 7789 isarray = NULL; 7790 } 7791 /* destroy no longer needed map */ 7792 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7793 7794 /* subassemble */ 7795 if (multilevel_allowed) { 7796 Vec vp[1]; 7797 PetscInt nvecs = 0; 7798 PetscBool reuse,reuser; 7799 7800 if (coarse_mat) reuse = PETSC_TRUE; 7801 else reuse = PETSC_FALSE; 7802 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7803 vp[0] = NULL; 7804 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7805 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7806 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7807 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7808 nvecs = 1; 7809 7810 if (pcbddc->divudotp) { 7811 Mat B,loc_divudotp; 7812 Vec v,p; 7813 IS dummy; 7814 PetscInt np; 7815 7816 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7817 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7818 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7819 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7820 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7821 ierr = VecSet(p,1.);CHKERRQ(ierr); 7822 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7823 ierr = VecDestroy(&p);CHKERRQ(ierr); 7824 ierr = MatDestroy(&B);CHKERRQ(ierr); 7825 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7826 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7827 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7828 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7829 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7830 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7831 ierr = VecDestroy(&v);CHKERRQ(ierr); 7832 } 7833 } 7834 if (reuser) { 7835 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7836 } else { 7837 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7838 } 7839 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7840 PetscScalar *arraym,*arrayv; 7841 PetscInt nl; 7842 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7843 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7844 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7845 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7846 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7847 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7848 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7849 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7850 } else { 7851 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7852 } 7853 } else { 7854 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7855 } 7856 if (coarse_mat_is || coarse_mat) { 7857 PetscMPIInt size; 7858 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7859 if (!multilevel_allowed) { 7860 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7861 } else { 7862 Mat A; 7863 7864 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7865 if (coarse_mat_is) { 7866 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7867 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7868 coarse_mat = coarse_mat_is; 7869 } 7870 /* be sure we don't have MatSeqDENSE as local mat */ 7871 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7872 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7873 } 7874 } 7875 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7876 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7877 7878 /* create local to global scatters for coarse problem */ 7879 if (compute_vecs) { 7880 PetscInt lrows; 7881 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7882 if (coarse_mat) { 7883 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7884 } else { 7885 lrows = 0; 7886 } 7887 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7888 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7889 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7890 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7891 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7892 } 7893 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7894 7895 /* set defaults for coarse KSP and PC */ 7896 if (multilevel_allowed) { 7897 coarse_ksp_type = KSPRICHARDSON; 7898 coarse_pc_type = PCBDDC; 7899 } else { 7900 coarse_ksp_type = KSPPREONLY; 7901 coarse_pc_type = PCREDUNDANT; 7902 } 7903 7904 /* print some info if requested */ 7905 if (pcbddc->dbg_flag) { 7906 if (!multilevel_allowed) { 7907 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7908 if (multilevel_requested) { 7909 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7910 } else if (pcbddc->max_levels) { 7911 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7912 } 7913 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7914 } 7915 } 7916 7917 /* communicate coarse discrete gradient */ 7918 coarseG = NULL; 7919 if (pcbddc->nedcG && multilevel_allowed) { 7920 MPI_Comm ccomm; 7921 if (coarse_mat) { 7922 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7923 } else { 7924 ccomm = MPI_COMM_NULL; 7925 } 7926 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7927 } 7928 7929 /* create the coarse KSP object only once with defaults */ 7930 if (coarse_mat) { 7931 PetscBool isredundant,isnn,isbddc; 7932 PetscViewer dbg_viewer = NULL; 7933 7934 if (pcbddc->dbg_flag) { 7935 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7936 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7937 } 7938 if (!pcbddc->coarse_ksp) { 7939 char prefix[256],str_level[16]; 7940 size_t len; 7941 7942 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7943 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7944 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7945 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7946 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7947 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7948 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7949 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7950 /* TODO is this logic correct? should check for coarse_mat type */ 7951 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7952 /* prefix */ 7953 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7954 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7955 if (!pcbddc->current_level) { 7956 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7957 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7958 } else { 7959 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7960 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7961 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7962 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7963 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 7964 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7965 } 7966 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7967 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7968 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7969 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7970 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7971 /* allow user customization */ 7972 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7973 } 7974 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7975 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7976 if (nisdofs) { 7977 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7978 for (i=0;i<nisdofs;i++) { 7979 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7980 } 7981 } 7982 if (nisneu) { 7983 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7984 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7985 } 7986 if (nisvert) { 7987 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7988 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7989 } 7990 if (coarseG) { 7991 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7992 } 7993 7994 /* get some info after set from options */ 7995 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7996 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 7997 if (isbddc && !multilevel_allowed) { 7998 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7999 isbddc = PETSC_FALSE; 8000 } 8001 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 8002 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 8003 if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) { 8004 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8005 isbddc = PETSC_TRUE; 8006 } 8007 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 8008 if (isredundant) { 8009 KSP inner_ksp; 8010 PC inner_pc; 8011 8012 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 8013 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 8014 } 8015 8016 /* parameters which miss an API */ 8017 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8018 if (isbddc) { 8019 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8020 8021 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8022 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8023 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8024 if (pcbddc_coarse->benign_saddle_point) { 8025 Mat coarsedivudotp_is; 8026 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8027 IS row,col; 8028 const PetscInt *gidxs; 8029 PetscInt n,st,M,N; 8030 8031 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 8032 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 8033 st = st-n; 8034 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 8035 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 8036 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 8037 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8038 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 8039 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8040 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 8041 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 8042 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 8043 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 8044 ierr = ISDestroy(&row);CHKERRQ(ierr); 8045 ierr = ISDestroy(&col);CHKERRQ(ierr); 8046 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 8047 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8048 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8049 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8050 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8051 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8052 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8053 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8054 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8055 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8056 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8057 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8058 } 8059 } 8060 8061 /* propagate symmetry info of coarse matrix */ 8062 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8063 if (pc->pmat->symmetric_set) { 8064 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8065 } 8066 if (pc->pmat->hermitian_set) { 8067 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8068 } 8069 if (pc->pmat->spd_set) { 8070 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8071 } 8072 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8073 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8074 } 8075 /* set operators */ 8076 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8077 if (pcbddc->dbg_flag) { 8078 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8079 } 8080 } 8081 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8082 ierr = PetscFree(isarray);CHKERRQ(ierr); 8083 #if 0 8084 { 8085 PetscViewer viewer; 8086 char filename[256]; 8087 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8088 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8089 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8090 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8091 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8092 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8093 } 8094 #endif 8095 8096 if (pcbddc->coarse_ksp) { 8097 Vec crhs,csol; 8098 8099 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8100 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8101 if (!csol) { 8102 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8103 } 8104 if (!crhs) { 8105 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8106 } 8107 } 8108 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8109 8110 /* compute null space for coarse solver if the benign trick has been requested */ 8111 if (pcbddc->benign_null) { 8112 8113 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8114 for (i=0;i<pcbddc->benign_n;i++) { 8115 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8116 } 8117 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8118 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8119 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8120 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8121 if (coarse_mat) { 8122 Vec nullv; 8123 PetscScalar *array,*array2; 8124 PetscInt nl; 8125 8126 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8127 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8128 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8129 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8130 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 8131 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8132 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8133 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8134 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8135 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8136 } 8137 } 8138 8139 if (pcbddc->coarse_ksp) { 8140 PetscBool ispreonly; 8141 8142 if (CoarseNullSpace) { 8143 PetscBool isnull; 8144 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8145 if (isnull) { 8146 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8147 } 8148 /* TODO: add local nullspaces (if any) */ 8149 } 8150 /* setup coarse ksp */ 8151 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8152 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8153 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8154 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 8155 KSP check_ksp; 8156 KSPType check_ksp_type; 8157 PC check_pc; 8158 Vec check_vec,coarse_vec; 8159 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8160 PetscInt its; 8161 PetscBool compute_eigs; 8162 PetscReal *eigs_r,*eigs_c; 8163 PetscInt neigs; 8164 const char *prefix; 8165 8166 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8167 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8168 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8169 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8170 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8171 /* prevent from setup unneeded object */ 8172 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8173 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8174 if (ispreonly) { 8175 check_ksp_type = KSPPREONLY; 8176 compute_eigs = PETSC_FALSE; 8177 } else { 8178 check_ksp_type = KSPGMRES; 8179 compute_eigs = PETSC_TRUE; 8180 } 8181 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8182 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8183 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8184 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8185 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8186 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8187 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8188 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8189 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8190 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8191 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8192 /* create random vec */ 8193 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8194 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8195 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8196 /* solve coarse problem */ 8197 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8198 /* set eigenvalue estimation if preonly has not been requested */ 8199 if (compute_eigs) { 8200 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8201 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8202 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8203 if (neigs) { 8204 lambda_max = eigs_r[neigs-1]; 8205 lambda_min = eigs_r[0]; 8206 if (pcbddc->use_coarse_estimates) { 8207 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8208 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8209 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8210 } 8211 } 8212 } 8213 } 8214 8215 /* check coarse problem residual error */ 8216 if (pcbddc->dbg_flag) { 8217 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8218 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8219 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8220 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8221 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8222 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8223 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8224 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8225 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8226 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8227 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8228 if (CoarseNullSpace) { 8229 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8230 } 8231 if (compute_eigs) { 8232 PetscReal lambda_max_s,lambda_min_s; 8233 KSPConvergedReason reason; 8234 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8235 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8236 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8237 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8238 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8239 for (i=0;i<neigs;i++) { 8240 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8241 } 8242 } 8243 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8244 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8245 } 8246 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8247 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8248 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8249 if (compute_eigs) { 8250 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8251 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8252 } 8253 } 8254 } 8255 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8256 /* print additional info */ 8257 if (pcbddc->dbg_flag) { 8258 /* waits until all processes reaches this point */ 8259 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8260 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 8261 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8262 } 8263 8264 /* free memory */ 8265 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8266 PetscFunctionReturn(0); 8267 } 8268 8269 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8270 { 8271 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8272 PC_IS* pcis = (PC_IS*)pc->data; 8273 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8274 IS subset,subset_mult,subset_n; 8275 PetscInt local_size,coarse_size=0; 8276 PetscInt *local_primal_indices=NULL; 8277 const PetscInt *t_local_primal_indices; 8278 PetscErrorCode ierr; 8279 8280 PetscFunctionBegin; 8281 /* Compute global number of coarse dofs */ 8282 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8283 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8284 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8285 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8286 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8287 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8288 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8289 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8290 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8291 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8292 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8293 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8294 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 8295 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8296 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8297 8298 /* check numbering */ 8299 if (pcbddc->dbg_flag) { 8300 PetscScalar coarsesum,*array,*array2; 8301 PetscInt i; 8302 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8303 8304 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8305 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8306 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8307 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8308 /* counter */ 8309 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8310 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8311 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8312 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8313 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8314 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8315 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8316 for (i=0;i<pcbddc->local_primal_size;i++) { 8317 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8318 } 8319 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8320 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8321 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8322 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8323 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8324 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8325 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8326 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8327 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8328 for (i=0;i<pcis->n;i++) { 8329 if (array[i] != 0.0 && array[i] != array2[i]) { 8330 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8331 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8332 set_error = PETSC_TRUE; 8333 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8334 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8335 } 8336 } 8337 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8338 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8339 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8340 for (i=0;i<pcis->n;i++) { 8341 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8342 } 8343 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8344 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8345 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8346 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8347 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8348 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8349 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8350 PetscInt *gidxs; 8351 8352 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8353 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8354 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8355 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8356 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8357 for (i=0;i<pcbddc->local_primal_size;i++) { 8358 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8359 } 8360 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8361 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8362 } 8363 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8364 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8365 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8366 } 8367 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 8368 /* get back data */ 8369 *coarse_size_n = coarse_size; 8370 *local_primal_indices_n = local_primal_indices; 8371 PetscFunctionReturn(0); 8372 } 8373 8374 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8375 { 8376 IS localis_t; 8377 PetscInt i,lsize,*idxs,n; 8378 PetscScalar *vals; 8379 PetscErrorCode ierr; 8380 8381 PetscFunctionBegin; 8382 /* get indices in local ordering exploiting local to global map */ 8383 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8384 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8385 for (i=0;i<lsize;i++) vals[i] = 1.0; 8386 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8387 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8388 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8389 if (idxs) { /* multilevel guard */ 8390 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 8391 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8392 } 8393 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8394 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8395 ierr = PetscFree(vals);CHKERRQ(ierr); 8396 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8397 /* now compute set in local ordering */ 8398 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8399 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8400 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8401 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8402 for (i=0,lsize=0;i<n;i++) { 8403 if (PetscRealPart(vals[i]) > 0.5) { 8404 lsize++; 8405 } 8406 } 8407 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8408 for (i=0,lsize=0;i<n;i++) { 8409 if (PetscRealPart(vals[i]) > 0.5) { 8410 idxs[lsize++] = i; 8411 } 8412 } 8413 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8414 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8415 *localis = localis_t; 8416 PetscFunctionReturn(0); 8417 } 8418 8419 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8420 { 8421 PC_IS *pcis=(PC_IS*)pc->data; 8422 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8423 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8424 Mat S_j; 8425 PetscInt *used_xadj,*used_adjncy; 8426 PetscBool free_used_adj; 8427 PetscErrorCode ierr; 8428 8429 PetscFunctionBegin; 8430 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8431 free_used_adj = PETSC_FALSE; 8432 if (pcbddc->sub_schurs_layers == -1) { 8433 used_xadj = NULL; 8434 used_adjncy = NULL; 8435 } else { 8436 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8437 used_xadj = pcbddc->mat_graph->xadj; 8438 used_adjncy = pcbddc->mat_graph->adjncy; 8439 } else if (pcbddc->computed_rowadj) { 8440 used_xadj = pcbddc->mat_graph->xadj; 8441 used_adjncy = pcbddc->mat_graph->adjncy; 8442 } else { 8443 PetscBool flg_row=PETSC_FALSE; 8444 const PetscInt *xadj,*adjncy; 8445 PetscInt nvtxs; 8446 8447 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8448 if (flg_row) { 8449 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8450 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8451 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8452 free_used_adj = PETSC_TRUE; 8453 } else { 8454 pcbddc->sub_schurs_layers = -1; 8455 used_xadj = NULL; 8456 used_adjncy = NULL; 8457 } 8458 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8459 } 8460 } 8461 8462 /* setup sub_schurs data */ 8463 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8464 if (!sub_schurs->schur_explicit) { 8465 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8466 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8467 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8468 } else { 8469 Mat change = NULL; 8470 Vec scaling = NULL; 8471 IS change_primal = NULL, iP; 8472 PetscInt benign_n; 8473 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8474 PetscBool isseqaij,need_change = PETSC_FALSE; 8475 PetscBool discrete_harmonic = PETSC_FALSE; 8476 8477 if (!pcbddc->use_vertices && reuse_solvers) { 8478 PetscInt n_vertices; 8479 8480 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8481 reuse_solvers = (PetscBool)!n_vertices; 8482 } 8483 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8484 if (!isseqaij) { 8485 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8486 if (matis->A == pcbddc->local_mat) { 8487 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8488 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8489 } else { 8490 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8491 } 8492 } 8493 if (!pcbddc->benign_change_explicit) { 8494 benign_n = pcbddc->benign_n; 8495 } else { 8496 benign_n = 0; 8497 } 8498 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8499 We need a global reduction to avoid possible deadlocks. 8500 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8501 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8502 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8503 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8504 need_change = (PetscBool)(!need_change); 8505 } 8506 /* If the user defines additional constraints, we import them here. 8507 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8508 if (need_change) { 8509 PC_IS *pcisf; 8510 PC_BDDC *pcbddcf; 8511 PC pcf; 8512 8513 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8514 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8515 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8516 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8517 8518 /* hacks */ 8519 pcisf = (PC_IS*)pcf->data; 8520 pcisf->is_B_local = pcis->is_B_local; 8521 pcisf->vec1_N = pcis->vec1_N; 8522 pcisf->BtoNmap = pcis->BtoNmap; 8523 pcisf->n = pcis->n; 8524 pcisf->n_B = pcis->n_B; 8525 pcbddcf = (PC_BDDC*)pcf->data; 8526 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8527 pcbddcf->mat_graph = pcbddc->mat_graph; 8528 pcbddcf->use_faces = PETSC_TRUE; 8529 pcbddcf->use_change_of_basis = PETSC_TRUE; 8530 pcbddcf->use_change_on_faces = PETSC_TRUE; 8531 pcbddcf->use_qr_single = PETSC_TRUE; 8532 pcbddcf->fake_change = PETSC_TRUE; 8533 8534 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8535 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8536 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8537 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8538 change = pcbddcf->ConstraintMatrix; 8539 pcbddcf->ConstraintMatrix = NULL; 8540 8541 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8542 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8543 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8544 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8545 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8546 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8547 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8548 pcf->ops->destroy = NULL; 8549 pcf->ops->reset = NULL; 8550 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8551 } 8552 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8553 8554 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 8555 if (iP) { 8556 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 8557 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 8558 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8559 } 8560 if (discrete_harmonic) { 8561 Mat A; 8562 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 8563 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 8564 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 8565 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8566 ierr = MatDestroy(&A);CHKERRQ(ierr); 8567 } else { 8568 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8569 } 8570 ierr = MatDestroy(&change);CHKERRQ(ierr); 8571 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8572 } 8573 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8574 8575 /* free adjacency */ 8576 if (free_used_adj) { 8577 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8578 } 8579 PetscFunctionReturn(0); 8580 } 8581 8582 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8583 { 8584 PC_IS *pcis=(PC_IS*)pc->data; 8585 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8586 PCBDDCGraph graph; 8587 PetscErrorCode ierr; 8588 8589 PetscFunctionBegin; 8590 /* attach interface graph for determining subsets */ 8591 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8592 IS verticesIS,verticescomm; 8593 PetscInt vsize,*idxs; 8594 8595 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8596 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8597 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8598 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8599 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8600 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8601 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8602 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8603 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8604 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8605 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8606 } else { 8607 graph = pcbddc->mat_graph; 8608 } 8609 /* print some info */ 8610 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8611 IS vertices; 8612 PetscInt nv,nedges,nfaces; 8613 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8614 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8615 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8616 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8617 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8618 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8619 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8620 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8621 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8622 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8623 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8624 } 8625 8626 /* sub_schurs init */ 8627 if (!pcbddc->sub_schurs) { 8628 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8629 } 8630 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8631 8632 /* free graph struct */ 8633 if (pcbddc->sub_schurs_rebuild) { 8634 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8635 } 8636 PetscFunctionReturn(0); 8637 } 8638 8639 PetscErrorCode PCBDDCCheckOperator(PC pc) 8640 { 8641 PC_IS *pcis=(PC_IS*)pc->data; 8642 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8643 PetscErrorCode ierr; 8644 8645 PetscFunctionBegin; 8646 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8647 IS zerodiag = NULL; 8648 Mat S_j,B0_B=NULL; 8649 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8650 PetscScalar *p0_check,*array,*array2; 8651 PetscReal norm; 8652 PetscInt i; 8653 8654 /* B0 and B0_B */ 8655 if (zerodiag) { 8656 IS dummy; 8657 8658 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8659 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8660 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8661 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8662 } 8663 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8664 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8665 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8666 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8667 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8668 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8669 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8670 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8671 /* S_j */ 8672 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8673 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8674 8675 /* mimic vector in \widetilde{W}_\Gamma */ 8676 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8677 /* continuous in primal space */ 8678 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8679 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8680 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8681 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8682 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8683 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8684 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8685 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8686 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8687 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8688 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8689 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8690 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8691 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8692 8693 /* assemble rhs for coarse problem */ 8694 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8695 /* local with Schur */ 8696 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8697 if (zerodiag) { 8698 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8699 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8700 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8701 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8702 } 8703 /* sum on primal nodes the local contributions */ 8704 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8705 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8706 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8707 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8708 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8709 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8710 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8711 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8712 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8713 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8714 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8715 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8716 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8717 /* scale primal nodes (BDDC sums contibutions) */ 8718 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8719 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8720 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8721 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8722 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8723 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8724 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8725 /* global: \widetilde{B0}_B w_\Gamma */ 8726 if (zerodiag) { 8727 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8728 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8729 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8730 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8731 } 8732 /* BDDC */ 8733 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8734 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8735 8736 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8737 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8738 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8739 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8740 for (i=0;i<pcbddc->benign_n;i++) { 8741 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8742 } 8743 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8744 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8745 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8746 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8747 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8748 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8749 } 8750 PetscFunctionReturn(0); 8751 } 8752 8753 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8754 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8755 { 8756 Mat At; 8757 IS rows; 8758 PetscInt rst,ren; 8759 PetscErrorCode ierr; 8760 PetscLayout rmap; 8761 8762 PetscFunctionBegin; 8763 rst = ren = 0; 8764 if (ccomm != MPI_COMM_NULL) { 8765 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8766 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8767 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8768 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8769 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8770 } 8771 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8772 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8773 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8774 8775 if (ccomm != MPI_COMM_NULL) { 8776 Mat_MPIAIJ *a,*b; 8777 IS from,to; 8778 Vec gvec; 8779 PetscInt lsize; 8780 8781 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8782 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8783 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8784 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8785 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8786 a = (Mat_MPIAIJ*)At->data; 8787 b = (Mat_MPIAIJ*)(*B)->data; 8788 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8789 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8790 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8791 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8792 b->A = a->A; 8793 b->B = a->B; 8794 8795 b->donotstash = a->donotstash; 8796 b->roworiented = a->roworiented; 8797 b->rowindices = 0; 8798 b->rowvalues = 0; 8799 b->getrowactive = PETSC_FALSE; 8800 8801 (*B)->rmap = rmap; 8802 (*B)->factortype = A->factortype; 8803 (*B)->assembled = PETSC_TRUE; 8804 (*B)->insertmode = NOT_SET_VALUES; 8805 (*B)->preallocated = PETSC_TRUE; 8806 8807 if (a->colmap) { 8808 #if defined(PETSC_USE_CTABLE) 8809 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8810 #else 8811 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8812 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8813 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8814 #endif 8815 } else b->colmap = 0; 8816 if (a->garray) { 8817 PetscInt len; 8818 len = a->B->cmap->n; 8819 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8820 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8821 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8822 } else b->garray = 0; 8823 8824 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8825 b->lvec = a->lvec; 8826 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8827 8828 /* cannot use VecScatterCopy */ 8829 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8830 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8831 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8832 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8833 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8834 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8835 ierr = ISDestroy(&from);CHKERRQ(ierr); 8836 ierr = ISDestroy(&to);CHKERRQ(ierr); 8837 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8838 } 8839 ierr = MatDestroy(&At);CHKERRQ(ierr); 8840 PetscFunctionReturn(0); 8841 } 8842