1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 #if !defined(PETSC_USE_COMPLEX) 18 PetscScalar *uwork,*data,*U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM,bN,lwork,lierr,di = 1; 21 PetscInt ulw,i,nr,nc,n; 22 PetscErrorCode ierr; 23 24 PetscFunctionBegin; 25 #if defined(PETSC_MISSING_LAPACK_GESVD) 26 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 27 #else 28 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 29 if (!nr || !nc) PetscFunctionReturn(0); 30 31 /* workspace */ 32 if (!work) { 33 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 34 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 35 } else { 36 ulw = lw; 37 uwork = work; 38 } 39 n = PetscMin(nr,nc); 40 if (!rwork) { 41 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 42 } else { 43 sing = rwork; 44 } 45 46 /* SVD */ 47 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 49 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 50 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 51 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 52 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 53 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 54 ierr = PetscFPTrapPop();CHKERRQ(ierr); 55 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 56 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 57 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 58 if (!rwork) { 59 ierr = PetscFree(sing);CHKERRQ(ierr); 60 } 61 if (!work) { 62 ierr = PetscFree(uwork);CHKERRQ(ierr); 63 } 64 /* create B */ 65 if (!range) { 66 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 67 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 68 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 69 } else { 70 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 71 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 72 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 73 } 74 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 75 ierr = PetscFree(U);CHKERRQ(ierr); 76 #endif 77 #else /* PETSC_USE_COMPLEX */ 78 PetscFunctionBegin; 79 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 80 #endif 81 PetscFunctionReturn(0); 82 } 83 84 /* TODO REMOVE */ 85 #if defined(PRINT_GDET) 86 static int inc = 0; 87 static int lev = 0; 88 #endif 89 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 PetscScalar *vals,v; 122 123 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 124 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 125 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 126 /* v = PetscAbsScalar(vals[0]) */; 127 v = 1.; 128 cvals[0] = vals[0]/v; 129 cvals[1] = vals[1]/v; 130 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 131 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 132 #if defined(PRINT_GDET) 133 { 134 PetscViewer viewer; 135 char filename[256]; 136 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 137 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 138 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 140 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 141 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 142 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 143 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 144 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 145 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 146 } 147 #endif 148 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 149 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 150 } 151 152 PetscFunctionReturn(0); 153 } 154 155 PetscErrorCode PCBDDCNedelecSupport(PC pc) 156 { 157 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 158 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 159 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 160 Vec tvec; 161 PetscSF sfv; 162 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 163 MPI_Comm comm; 164 IS lned,primals,allprimals,nedfieldlocal; 165 IS *eedges,*extrows,*extcols,*alleedges; 166 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 167 PetscScalar *vals,*work; 168 PetscReal *rwork; 169 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 170 PetscInt ne,nv,Lv,order,n,field; 171 PetscInt n_neigh,*neigh,*n_shared,**shared; 172 PetscInt i,j,extmem,cum,maxsize,nee; 173 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 174 PetscInt *sfvleaves,*sfvroots; 175 PetscInt *corners,*cedges; 176 PetscInt *ecount,**eneighs,*vcount,**vneighs; 177 #if defined(PETSC_USE_DEBUG) 178 PetscInt *emarks; 179 #endif 180 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 181 PetscErrorCode ierr; 182 183 PetscFunctionBegin; 184 /* If the discrete gradient is defined for a subset of dofs and global is true, 185 it assumes G is given in global ordering for all the dofs. 186 Otherwise, the ordering is global for the Nedelec field */ 187 order = pcbddc->nedorder; 188 conforming = pcbddc->conforming; 189 field = pcbddc->nedfield; 190 global = pcbddc->nedglobal; 191 setprimal = PETSC_FALSE; 192 print = PETSC_FALSE; 193 singular = PETSC_FALSE; 194 195 /* Command line customization */ 196 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 199 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 200 /* print debug info TODO: to be removed */ 201 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 202 ierr = PetscOptionsEnd();CHKERRQ(ierr); 203 204 /* Return if there are no edges in the decomposition and the problem is not singular */ 205 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 206 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 207 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 208 if (!singular) { 209 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 210 lrc[0] = PETSC_FALSE; 211 for (i=0;i<n;i++) { 212 if (PetscRealPart(vals[i]) > 2.) { 213 lrc[0] = PETSC_TRUE; 214 break; 215 } 216 } 217 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 218 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 219 if (!lrc[1]) PetscFunctionReturn(0); 220 } 221 222 /* Get Nedelec field */ 223 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 224 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 225 if (pcbddc->n_ISForDofsLocal && field >= 0) { 226 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 227 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 228 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 229 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 230 ne = n; 231 nedfieldlocal = NULL; 232 global = PETSC_TRUE; 233 } else if (field == PETSC_DECIDE) { 234 PetscInt rst,ren,*idx; 235 236 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 237 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 238 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 239 for (i=rst;i<ren;i++) { 240 PetscInt nc; 241 242 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 243 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 244 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 245 } 246 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 247 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 248 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 249 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 250 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 251 } else { 252 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 253 } 254 255 /* Sanity checks */ 256 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 257 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 258 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 259 260 /* Just set primal dofs and return */ 261 if (setprimal) { 262 IS enedfieldlocal; 263 PetscInt *eidxs; 264 265 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 266 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 267 if (nedfieldlocal) { 268 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 269 for (i=0,cum=0;i<ne;i++) { 270 if (PetscRealPart(vals[idxs[i]]) > 2.) { 271 eidxs[cum++] = idxs[i]; 272 } 273 } 274 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 275 } else { 276 for (i=0,cum=0;i<ne;i++) { 277 if (PetscRealPart(vals[i]) > 2.) { 278 eidxs[cum++] = i; 279 } 280 } 281 } 282 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 283 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 284 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 285 ierr = PetscFree(eidxs);CHKERRQ(ierr); 286 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 287 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 288 PetscFunctionReturn(0); 289 } 290 291 /* Compute some l2g maps */ 292 if (nedfieldlocal) { 293 IS is; 294 295 /* need to map from the local Nedelec field to local numbering */ 296 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 297 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 298 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 299 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 300 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 301 if (global) { 302 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 303 el2g = al2g; 304 } else { 305 IS gis; 306 307 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 308 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 309 ierr = ISDestroy(&gis);CHKERRQ(ierr); 310 } 311 ierr = ISDestroy(&is);CHKERRQ(ierr); 312 } else { 313 /* restore default */ 314 pcbddc->nedfield = -1; 315 /* one ref for the destruction of al2g, one for el2g */ 316 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 317 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 318 el2g = al2g; 319 fl2g = NULL; 320 } 321 322 /* Start communication to drop connections for interior edges (for cc analysis only) */ 323 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 324 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 325 if (nedfieldlocal) { 326 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 327 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 328 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 329 } else { 330 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 331 } 332 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 333 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 334 335 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 336 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 337 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 338 if (global) { 339 PetscInt rst; 340 341 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 342 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 343 if (matis->sf_rootdata[i] < 2) { 344 matis->sf_rootdata[cum++] = i + rst; 345 } 346 } 347 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 348 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 349 } else { 350 PetscInt *tbz; 351 352 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 353 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 354 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 355 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 356 for (i=0,cum=0;i<ne;i++) 357 if (matis->sf_leafdata[idxs[i]] == 1) 358 tbz[cum++] = i; 359 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 360 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 361 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 362 ierr = PetscFree(tbz);CHKERRQ(ierr); 363 } 364 } else { /* we need the entire G to infer the nullspace */ 365 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 366 G = pcbddc->discretegradient; 367 } 368 369 /* Extract subdomain relevant rows of G */ 370 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 371 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 372 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 373 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 374 ierr = ISDestroy(&lned);CHKERRQ(ierr); 375 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 376 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 377 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 378 379 /* SF for nodal dofs communications */ 380 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 381 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 382 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 384 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 386 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 387 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 388 i = singular ? 2 : 1; 389 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 390 391 /* Destroy temporary G created in MATIS format and modified G */ 392 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 393 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 394 ierr = MatDestroy(&G);CHKERRQ(ierr); 395 396 if (print) { 397 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 398 ierr = MatView(lG,NULL);CHKERRQ(ierr); 399 } 400 401 /* Save lG for values insertion in change of basis */ 402 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 403 404 /* Analyze the edge-nodes connections (duplicate lG) */ 405 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 406 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 407 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 409 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 410 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 411 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 412 /* need to import the boundary specification to ensure the 413 proper detection of coarse edges' endpoints */ 414 if (pcbddc->DirichletBoundariesLocal) { 415 IS is; 416 417 if (fl2g) { 418 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 419 } else { 420 is = pcbddc->DirichletBoundariesLocal; 421 } 422 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 423 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 424 for (i=0;i<cum;i++) { 425 if (idxs[i] >= 0) { 426 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 427 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 428 } 429 } 430 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 431 if (fl2g) { 432 ierr = ISDestroy(&is);CHKERRQ(ierr); 433 } 434 } 435 if (pcbddc->NeumannBoundariesLocal) { 436 IS is; 437 438 if (fl2g) { 439 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 440 } else { 441 is = pcbddc->NeumannBoundariesLocal; 442 } 443 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 444 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 445 for (i=0;i<cum;i++) { 446 if (idxs[i] >= 0) { 447 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 448 } 449 } 450 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 451 if (fl2g) { 452 ierr = ISDestroy(&is);CHKERRQ(ierr); 453 } 454 } 455 456 /* Count neighs per dof */ 457 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 458 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 459 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 460 for (i=1,cum=0;i<n_neigh;i++) { 461 cum += n_shared[i]; 462 for (j=0;j<n_shared[i];j++) { 463 ecount[shared[i][j]]++; 464 } 465 } 466 if (ne) { 467 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 468 } 469 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 470 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 471 for (i=1;i<n_neigh;i++) { 472 for (j=0;j<n_shared[i];j++) { 473 PetscInt k = shared[i][j]; 474 eneighs[k][ecount[k]] = neigh[i]; 475 ecount[k]++; 476 } 477 } 478 for (i=0;i<ne;i++) { 479 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 480 } 481 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 482 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 483 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 484 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 485 for (i=1,cum=0;i<n_neigh;i++) { 486 cum += n_shared[i]; 487 for (j=0;j<n_shared[i];j++) { 488 vcount[shared[i][j]]++; 489 } 490 } 491 if (nv) { 492 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 493 } 494 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 495 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 496 for (i=1;i<n_neigh;i++) { 497 for (j=0;j<n_shared[i];j++) { 498 PetscInt k = shared[i][j]; 499 vneighs[k][vcount[k]] = neigh[i]; 500 vcount[k]++; 501 } 502 } 503 for (i=0;i<nv;i++) { 504 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 505 } 506 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 507 508 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 509 for proper detection of coarse edges' endpoints */ 510 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 511 for (i=0;i<ne;i++) { 512 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 513 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 514 } 515 } 516 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 517 if (!conforming) { 518 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 519 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 520 } 521 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 522 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 523 cum = 0; 524 for (i=0;i<ne;i++) { 525 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 526 if (!PetscBTLookup(btee,i)) { 527 marks[cum++] = i; 528 continue; 529 } 530 /* set badly connected edge dofs as primal */ 531 if (!conforming) { 532 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 533 marks[cum++] = i; 534 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 535 for (j=ii[i];j<ii[i+1];j++) { 536 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 537 } 538 } else { 539 /* every edge dofs should be connected trough a certain number of nodal dofs 540 to other edge dofs belonging to coarse edges 541 - at most 2 endpoints 542 - order-1 interior nodal dofs 543 - no undefined nodal dofs (nconn < order) 544 */ 545 PetscInt ends = 0,ints = 0, undef = 0; 546 for (j=ii[i];j<ii[i+1];j++) { 547 PetscInt v = jj[j],k; 548 PetscInt nconn = iit[v+1]-iit[v]; 549 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 550 if (nconn > order) ends++; 551 else if (nconn == order) ints++; 552 else undef++; 553 } 554 if (undef || ends > 2 || ints != order -1) { 555 marks[cum++] = i; 556 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 557 for (j=ii[i];j<ii[i+1];j++) { 558 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 559 } 560 } 561 } 562 } 563 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 564 if (!order && ii[i+1] != ii[i]) { 565 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 566 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 567 } 568 } 569 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 570 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 571 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 572 if (!conforming) { 573 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 574 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 575 } 576 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 577 578 /* identify splitpoints and corner candidates */ 579 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 580 if (print) { 581 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 582 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 583 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 584 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 585 } 586 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 587 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 588 for (i=0;i<nv;i++) { 589 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 590 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 591 if (!order) { /* variable order */ 592 PetscReal vorder = 0.; 593 594 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 595 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 596 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 597 ord = 1; 598 } 599 #if defined(PETSC_USE_DEBUG) 600 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 601 #endif 602 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 603 if (PetscBTLookup(btbd,jj[j])) { 604 bdir = PETSC_TRUE; 605 break; 606 } 607 if (vc != ecount[jj[j]]) { 608 sneighs = PETSC_FALSE; 609 } else { 610 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 611 for (k=0;k<vc;k++) { 612 if (vn[k] != en[k]) { 613 sneighs = PETSC_FALSE; 614 break; 615 } 616 } 617 } 618 } 619 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 620 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 621 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 622 } else if (test == ord) { 623 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 624 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 625 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 626 } else { 627 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 628 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 629 } 630 } 631 } 632 ierr = PetscFree(ecount);CHKERRQ(ierr); 633 ierr = PetscFree(vcount);CHKERRQ(ierr); 634 if (ne) { 635 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 636 } 637 if (nv) { 638 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 639 } 640 ierr = PetscFree(eneighs);CHKERRQ(ierr); 641 ierr = PetscFree(vneighs);CHKERRQ(ierr); 642 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 643 644 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 645 if (order != 1) { 646 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 647 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 648 for (i=0;i<nv;i++) { 649 if (PetscBTLookup(btvcand,i)) { 650 PetscBool found = PETSC_FALSE; 651 for (j=ii[i];j<ii[i+1] && !found;j++) { 652 PetscInt k,e = jj[j]; 653 if (PetscBTLookup(bte,e)) continue; 654 for (k=iit[e];k<iit[e+1];k++) { 655 PetscInt v = jjt[k]; 656 if (v != i && PetscBTLookup(btvcand,v)) { 657 found = PETSC_TRUE; 658 break; 659 } 660 } 661 } 662 if (!found) { 663 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 664 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 665 } else { 666 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 667 } 668 } 669 } 670 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 671 } 672 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 673 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 674 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 675 676 /* Get the local G^T explicitly */ 677 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 678 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 679 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 680 681 /* Mark interior nodal dofs */ 682 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 683 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 684 for (i=1;i<n_neigh;i++) { 685 for (j=0;j<n_shared[i];j++) { 686 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 687 } 688 } 689 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 690 691 /* communicate corners and splitpoints */ 692 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 693 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 694 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 695 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 696 697 if (print) { 698 IS tbz; 699 700 cum = 0; 701 for (i=0;i<nv;i++) 702 if (sfvleaves[i]) 703 vmarks[cum++] = i; 704 705 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 706 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 707 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 708 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 709 } 710 711 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 712 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 713 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 714 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 715 716 /* Zero rows of lGt corresponding to identified corners 717 and interior nodal dofs */ 718 cum = 0; 719 for (i=0;i<nv;i++) { 720 if (sfvleaves[i]) { 721 vmarks[cum++] = i; 722 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 723 } 724 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 725 } 726 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 727 if (print) { 728 IS tbz; 729 730 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 731 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 732 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 733 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 734 } 735 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 736 ierr = PetscFree(vmarks);CHKERRQ(ierr); 737 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 738 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 739 740 /* Recompute G */ 741 ierr = MatDestroy(&lG);CHKERRQ(ierr); 742 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 743 if (print) { 744 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 745 ierr = MatView(lG,NULL);CHKERRQ(ierr); 746 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 747 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 748 } 749 750 /* Get primal dofs (if any) */ 751 cum = 0; 752 for (i=0;i<ne;i++) { 753 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 754 } 755 if (fl2g) { 756 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 757 } 758 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 759 if (print) { 760 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 761 ierr = ISView(primals,NULL);CHKERRQ(ierr); 762 } 763 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 764 /* TODO: what if the user passed in some of them ? */ 765 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 766 ierr = ISDestroy(&primals);CHKERRQ(ierr); 767 768 /* Compute edge connectivity */ 769 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 770 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 771 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 772 if (fl2g) { 773 PetscBT btf; 774 PetscInt *iia,*jja,*iiu,*jju; 775 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 776 777 /* create CSR for all local dofs */ 778 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 779 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 780 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 781 iiu = pcbddc->mat_graph->xadj; 782 jju = pcbddc->mat_graph->adjncy; 783 } else if (pcbddc->use_local_adj) { 784 rest = PETSC_TRUE; 785 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 786 } else { 787 free = PETSC_TRUE; 788 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 789 iiu[0] = 0; 790 for (i=0;i<n;i++) { 791 iiu[i+1] = i+1; 792 jju[i] = -1; 793 } 794 } 795 796 /* import sizes of CSR */ 797 iia[0] = 0; 798 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 799 800 /* overwrite entries corresponding to the Nedelec field */ 801 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 802 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 803 for (i=0;i<ne;i++) { 804 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 805 iia[idxs[i]+1] = ii[i+1]-ii[i]; 806 } 807 808 /* iia in CSR */ 809 for (i=0;i<n;i++) iia[i+1] += iia[i]; 810 811 /* jja in CSR */ 812 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 813 for (i=0;i<n;i++) 814 if (!PetscBTLookup(btf,i)) 815 for (j=0;j<iiu[i+1]-iiu[i];j++) 816 jja[iia[i]+j] = jju[iiu[i]+j]; 817 818 /* map edge dofs connectivity */ 819 if (jj) { 820 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 821 for (i=0;i<ne;i++) { 822 PetscInt e = idxs[i]; 823 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 824 } 825 } 826 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 827 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 828 if (rest) { 829 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 830 } 831 if (free) { 832 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 833 } 834 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 835 } else { 836 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 837 } 838 839 /* Analyze interface for edge dofs */ 840 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 841 pcbddc->mat_graph->twodim = PETSC_FALSE; 842 843 /* Get coarse edges in the edge space */ 844 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 845 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 846 847 if (fl2g) { 848 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 849 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 850 for (i=0;i<nee;i++) { 851 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 852 } 853 } else { 854 eedges = alleedges; 855 primals = allprimals; 856 } 857 858 /* Mark fine edge dofs with their coarse edge id */ 859 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 860 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 861 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 862 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 863 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 864 if (print) { 865 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 866 ierr = ISView(primals,NULL);CHKERRQ(ierr); 867 } 868 869 maxsize = 0; 870 for (i=0;i<nee;i++) { 871 PetscInt size,mark = i+1; 872 873 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 874 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 875 for (j=0;j<size;j++) marks[idxs[j]] = mark; 876 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 877 maxsize = PetscMax(maxsize,size); 878 } 879 880 /* Find coarse edge endpoints */ 881 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 882 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 883 for (i=0;i<nee;i++) { 884 PetscInt mark = i+1,size; 885 886 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 887 if (!size && nedfieldlocal) continue; 888 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 889 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 890 if (print) { 891 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 892 ISView(eedges[i],NULL); 893 } 894 for (j=0;j<size;j++) { 895 PetscInt k, ee = idxs[j]; 896 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 897 for (k=ii[ee];k<ii[ee+1];k++) { 898 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 899 if (PetscBTLookup(btv,jj[k])) { 900 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 901 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 902 PetscInt k2; 903 PetscBool corner = PETSC_FALSE; 904 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 905 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 906 /* it's a corner if either is connected with an edge dof belonging to a different cc or 907 if the edge dof lie on the natural part of the boundary */ 908 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 909 corner = PETSC_TRUE; 910 break; 911 } 912 } 913 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 914 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 915 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 916 } else { 917 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 918 } 919 } 920 } 921 } 922 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 923 } 924 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 925 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 926 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 927 928 /* Reset marked primal dofs */ 929 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 930 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 931 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 932 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 933 934 /* Now use the initial lG */ 935 ierr = MatDestroy(&lG);CHKERRQ(ierr); 936 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 937 lG = lGinit; 938 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 939 940 /* Compute extended cols indices */ 941 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 942 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 943 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 944 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 945 i *= maxsize; 946 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 947 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 948 eerr = PETSC_FALSE; 949 for (i=0;i<nee;i++) { 950 PetscInt size,found = 0; 951 952 cum = 0; 953 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 954 if (!size && nedfieldlocal) continue; 955 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 956 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 957 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 958 for (j=0;j<size;j++) { 959 PetscInt k,ee = idxs[j]; 960 for (k=ii[ee];k<ii[ee+1];k++) { 961 PetscInt vv = jj[k]; 962 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 963 else if (!PetscBTLookupSet(btvc,vv)) found++; 964 } 965 } 966 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 967 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 968 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 969 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 970 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 971 /* it may happen that endpoints are not defined at this point 972 if it is the case, mark this edge for a second pass */ 973 if (cum != size -1 || found != 2) { 974 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 975 if (print) { 976 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 977 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 978 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 979 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 980 } 981 eerr = PETSC_TRUE; 982 } 983 } 984 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 985 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 986 if (done) { 987 PetscInt *newprimals; 988 989 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 990 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 991 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 992 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 993 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 994 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 995 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 996 for (i=0;i<nee;i++) { 997 PetscBool has_candidates = PETSC_FALSE; 998 if (PetscBTLookup(bter,i)) { 999 PetscInt size,mark = i+1; 1000 1001 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1002 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1003 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1004 for (j=0;j<size;j++) { 1005 PetscInt k,ee = idxs[j]; 1006 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1007 for (k=ii[ee];k<ii[ee+1];k++) { 1008 /* set all candidates located on the edge as corners */ 1009 if (PetscBTLookup(btvcand,jj[k])) { 1010 PetscInt k2,vv = jj[k]; 1011 has_candidates = PETSC_TRUE; 1012 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1013 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1014 /* set all edge dofs connected to candidate as primals */ 1015 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1016 if (marks[jjt[k2]] == mark) { 1017 PetscInt k3,ee2 = jjt[k2]; 1018 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1019 newprimals[cum++] = ee2; 1020 /* finally set the new corners */ 1021 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1022 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1023 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1024 } 1025 } 1026 } 1027 } else { 1028 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1029 } 1030 } 1031 } 1032 if (!has_candidates) { /* circular edge */ 1033 PetscInt k, ee = idxs[0],*tmarks; 1034 1035 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1036 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1037 for (k=ii[ee];k<ii[ee+1];k++) { 1038 PetscInt k2; 1039 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1040 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1041 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1042 } 1043 for (j=0;j<size;j++) { 1044 if (tmarks[idxs[j]] > 1) { 1045 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1046 newprimals[cum++] = idxs[j]; 1047 } 1048 } 1049 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1050 } 1051 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1052 } 1053 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1054 } 1055 ierr = PetscFree(extcols);CHKERRQ(ierr); 1056 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1057 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1058 if (fl2g) { 1059 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1060 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1061 for (i=0;i<nee;i++) { 1062 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1063 } 1064 ierr = PetscFree(eedges);CHKERRQ(ierr); 1065 } 1066 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1067 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1068 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1069 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1070 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1071 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1072 pcbddc->mat_graph->twodim = PETSC_FALSE; 1073 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1074 if (fl2g) { 1075 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1076 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1077 for (i=0;i<nee;i++) { 1078 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1079 } 1080 } else { 1081 eedges = alleedges; 1082 primals = allprimals; 1083 } 1084 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1085 1086 /* Mark again */ 1087 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1088 for (i=0;i<nee;i++) { 1089 PetscInt size,mark = i+1; 1090 1091 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1092 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1093 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1094 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1095 } 1096 if (print) { 1097 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1098 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1099 } 1100 1101 /* Recompute extended cols */ 1102 eerr = PETSC_FALSE; 1103 for (i=0;i<nee;i++) { 1104 PetscInt size; 1105 1106 cum = 0; 1107 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1108 if (!size && nedfieldlocal) continue; 1109 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1110 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1111 for (j=0;j<size;j++) { 1112 PetscInt k,ee = idxs[j]; 1113 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1114 } 1115 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1116 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1117 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1118 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1119 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1120 if (cum != size -1) { 1121 if (print) { 1122 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1123 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1125 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1126 } 1127 eerr = PETSC_TRUE; 1128 } 1129 } 1130 } 1131 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1132 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1133 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1134 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1135 /* an error should not occur at this point */ 1136 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1137 1138 /* Check the number of endpoints */ 1139 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1140 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1141 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1142 for (i=0;i<nee;i++) { 1143 PetscInt size, found = 0, gc[2]; 1144 1145 /* init with defaults */ 1146 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1147 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1148 if (!size && nedfieldlocal) continue; 1149 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1150 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1151 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1152 for (j=0;j<size;j++) { 1153 PetscInt k,ee = idxs[j]; 1154 for (k=ii[ee];k<ii[ee+1];k++) { 1155 PetscInt vv = jj[k]; 1156 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1157 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1158 corners[i*2+found++] = vv; 1159 } 1160 } 1161 } 1162 if (found != 2) { 1163 PetscInt e; 1164 if (fl2g) { 1165 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1166 } else { 1167 e = idxs[0]; 1168 } 1169 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1170 } 1171 1172 /* get primal dof index on this coarse edge */ 1173 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1174 if (gc[0] > gc[1]) { 1175 PetscInt swap = corners[2*i]; 1176 corners[2*i] = corners[2*i+1]; 1177 corners[2*i+1] = swap; 1178 } 1179 cedges[i] = idxs[size-1]; 1180 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1181 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1182 } 1183 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1184 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1185 1186 #if defined(PETSC_USE_DEBUG) 1187 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1188 not interfere with neighbouring coarse edges */ 1189 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1190 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1191 for (i=0;i<nv;i++) { 1192 PetscInt emax = 0,eemax = 0; 1193 1194 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1195 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1196 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1197 for (j=1;j<nee+1;j++) { 1198 if (emax < emarks[j]) { 1199 emax = emarks[j]; 1200 eemax = j; 1201 } 1202 } 1203 /* not relevant for edges */ 1204 if (!eemax) continue; 1205 1206 for (j=ii[i];j<ii[i+1];j++) { 1207 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1208 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1209 } 1210 } 1211 } 1212 ierr = PetscFree(emarks);CHKERRQ(ierr); 1213 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1214 #endif 1215 1216 /* Compute extended rows indices for edge blocks of the change of basis */ 1217 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1218 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1219 extmem *= maxsize; 1220 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1221 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1222 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1223 for (i=0;i<nv;i++) { 1224 PetscInt mark = 0,size,start; 1225 1226 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1227 for (j=ii[i];j<ii[i+1];j++) 1228 if (marks[jj[j]] && !mark) 1229 mark = marks[jj[j]]; 1230 1231 /* not relevant */ 1232 if (!mark) continue; 1233 1234 /* import extended row */ 1235 mark--; 1236 start = mark*extmem+extrowcum[mark]; 1237 size = ii[i+1]-ii[i]; 1238 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1239 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1240 extrowcum[mark] += size; 1241 } 1242 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1243 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1244 ierr = PetscFree(marks);CHKERRQ(ierr); 1245 1246 /* Compress extrows */ 1247 cum = 0; 1248 for (i=0;i<nee;i++) { 1249 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1250 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1251 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1252 cum = PetscMax(cum,size); 1253 } 1254 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1255 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1256 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1257 1258 /* Workspace for lapack inner calls and VecSetValues */ 1259 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1260 1261 /* Create change of basis matrix (preallocation can be improved) */ 1262 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1263 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1264 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1265 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1266 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1267 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1268 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1269 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1270 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1271 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1272 1273 /* Defaults to identity */ 1274 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1275 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1276 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1277 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1278 1279 /* Create discrete gradient for the coarser level if needed */ 1280 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1281 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1282 if (pcbddc->current_level < pcbddc->max_levels) { 1283 ISLocalToGlobalMapping cel2g,cvl2g; 1284 IS wis,gwis; 1285 PetscInt cnv,cne; 1286 1287 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1288 if (fl2g) { 1289 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1290 } else { 1291 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1292 pcbddc->nedclocal = wis; 1293 } 1294 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1295 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1296 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1297 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1298 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1299 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1300 1301 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1302 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1303 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1304 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1305 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1306 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1307 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1308 1309 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1310 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1311 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1312 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1313 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1314 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1315 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1316 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1317 } 1318 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1319 1320 #if defined(PRINT_GDET) 1321 inc = 0; 1322 lev = pcbddc->current_level; 1323 #endif 1324 1325 /* Insert values in the change of basis matrix */ 1326 for (i=0;i<nee;i++) { 1327 Mat Gins = NULL, GKins = NULL; 1328 IS cornersis = NULL; 1329 PetscScalar cvals[2]; 1330 1331 if (pcbddc->nedcG) { 1332 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1333 } 1334 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1335 if (Gins && GKins) { 1336 PetscScalar *data; 1337 const PetscInt *rows,*cols; 1338 PetscInt nrh,nch,nrc,ncc; 1339 1340 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1341 /* H1 */ 1342 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1343 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1344 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1345 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1346 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1347 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1348 /* complement */ 1349 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1350 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1351 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1352 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1353 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1354 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1355 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1356 1357 /* coarse discrete gradient */ 1358 if (pcbddc->nedcG) { 1359 PetscInt cols[2]; 1360 1361 cols[0] = 2*i; 1362 cols[1] = 2*i+1; 1363 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1364 } 1365 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1366 } 1367 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1368 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1369 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1370 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1371 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1372 } 1373 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1374 1375 /* Start assembling */ 1376 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1377 if (pcbddc->nedcG) { 1378 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1379 } 1380 1381 /* Free */ 1382 if (fl2g) { 1383 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1384 for (i=0;i<nee;i++) { 1385 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(eedges);CHKERRQ(ierr); 1388 } 1389 1390 /* hack mat_graph with primal dofs on the coarse edges */ 1391 { 1392 PCBDDCGraph graph = pcbddc->mat_graph; 1393 PetscInt *oqueue = graph->queue; 1394 PetscInt *ocptr = graph->cptr; 1395 PetscInt ncc,*idxs; 1396 1397 /* find first primal edge */ 1398 if (pcbddc->nedclocal) { 1399 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1400 } else { 1401 if (fl2g) { 1402 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1403 } 1404 idxs = cedges; 1405 } 1406 cum = 0; 1407 while (cum < nee && cedges[cum] < 0) cum++; 1408 1409 /* adapt connected components */ 1410 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1411 graph->cptr[0] = 0; 1412 for (i=0,ncc=0;i<graph->ncc;i++) { 1413 PetscInt lc = ocptr[i+1]-ocptr[i]; 1414 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1415 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1416 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1417 ncc++; 1418 lc--; 1419 cum++; 1420 while (cum < nee && cedges[cum] < 0) cum++; 1421 } 1422 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1423 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1424 ncc++; 1425 } 1426 graph->ncc = ncc; 1427 if (pcbddc->nedclocal) { 1428 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1429 } 1430 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1431 } 1432 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1433 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1434 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1435 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1436 1437 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1438 ierr = PetscFree(extrow);CHKERRQ(ierr); 1439 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1440 ierr = PetscFree(corners);CHKERRQ(ierr); 1441 ierr = PetscFree(cedges);CHKERRQ(ierr); 1442 ierr = PetscFree(extrows);CHKERRQ(ierr); 1443 ierr = PetscFree(extcols);CHKERRQ(ierr); 1444 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1445 1446 /* Complete assembling */ 1447 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1448 if (pcbddc->nedcG) { 1449 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1450 #if 0 1451 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1452 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1453 #endif 1454 } 1455 1456 /* set change of basis */ 1457 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1458 ierr = MatDestroy(&T);CHKERRQ(ierr); 1459 1460 PetscFunctionReturn(0); 1461 } 1462 1463 /* the near-null space of BDDC carries information on quadrature weights, 1464 and these can be collinear -> so cheat with MatNullSpaceCreate 1465 and create a suitable set of basis vectors first */ 1466 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1467 { 1468 PetscErrorCode ierr; 1469 PetscInt i; 1470 1471 PetscFunctionBegin; 1472 for (i=0;i<nvecs;i++) { 1473 PetscInt first,last; 1474 1475 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1476 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1477 if (i>=first && i < last) { 1478 PetscScalar *data; 1479 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1480 if (!has_const) { 1481 data[i-first] = 1.; 1482 } else { 1483 data[2*i-first] = 1./PetscSqrtReal(2.); 1484 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1485 } 1486 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1487 } 1488 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1489 } 1490 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1491 for (i=0;i<nvecs;i++) { /* reset vectors */ 1492 PetscInt first,last; 1493 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1494 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1495 if (i>=first && i < last) { 1496 PetscScalar *data; 1497 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1498 if (!has_const) { 1499 data[i-first] = 0.; 1500 } else { 1501 data[2*i-first] = 0.; 1502 data[2*i-first+1] = 0.; 1503 } 1504 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1505 } 1506 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1507 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1508 } 1509 PetscFunctionReturn(0); 1510 } 1511 1512 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1513 { 1514 Mat loc_divudotp; 1515 Vec p,v,vins,quad_vec,*quad_vecs; 1516 ISLocalToGlobalMapping map; 1517 PetscScalar *vals; 1518 const PetscScalar *array; 1519 PetscInt i,maxneighs,maxsize; 1520 PetscInt n_neigh,*neigh,*n_shared,**shared; 1521 PetscMPIInt rank; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1526 ierr = MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1527 if (!maxneighs) { 1528 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1529 *nnsp = NULL; 1530 PetscFunctionReturn(0); 1531 } 1532 maxsize = 0; 1533 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1534 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1535 /* create vectors to hold quadrature weights */ 1536 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1537 if (!transpose) { 1538 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1539 } else { 1540 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1541 } 1542 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1543 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1544 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1545 for (i=0;i<maxneighs;i++) { 1546 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1547 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1548 } 1549 1550 /* compute local quad vec */ 1551 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1552 if (!transpose) { 1553 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1554 } else { 1555 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1556 } 1557 ierr = VecSet(p,1.);CHKERRQ(ierr); 1558 if (!transpose) { 1559 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1560 } else { 1561 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1562 } 1563 if (vl2l) { 1564 Mat lA; 1565 VecScatter sc; 1566 1567 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1568 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1569 ierr = VecScatterCreate(v,vl2l,vins,NULL,&sc);CHKERRQ(ierr); 1570 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1571 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1572 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1573 } else { 1574 vins = v; 1575 } 1576 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1577 ierr = VecDestroy(&p);CHKERRQ(ierr); 1578 1579 /* insert in global quadrature vecs */ 1580 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1581 for (i=0;i<n_neigh;i++) { 1582 const PetscInt *idxs; 1583 PetscInt idx,nn,j; 1584 1585 idxs = shared[i]; 1586 nn = n_shared[i]; 1587 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1588 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1589 idx = -(idx+1); 1590 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1591 } 1592 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1593 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1594 if (vl2l) { 1595 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1596 } 1597 ierr = VecDestroy(&v);CHKERRQ(ierr); 1598 ierr = PetscFree(vals);CHKERRQ(ierr); 1599 1600 /* assemble near null space */ 1601 for (i=0;i<maxneighs;i++) { 1602 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1603 } 1604 for (i=0;i<maxneighs;i++) { 1605 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1606 ierr = VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");CHKERRQ(ierr); 1607 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1608 } 1609 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1610 PetscFunctionReturn(0); 1611 } 1612 1613 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1614 { 1615 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1616 PetscErrorCode ierr; 1617 1618 PetscFunctionBegin; 1619 if (primalv) { 1620 if (pcbddc->user_primal_vertices_local) { 1621 IS list[2], newp; 1622 1623 list[0] = primalv; 1624 list[1] = pcbddc->user_primal_vertices_local; 1625 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1626 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1627 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1628 pcbddc->user_primal_vertices_local = newp; 1629 } else { 1630 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1631 } 1632 } 1633 PetscFunctionReturn(0); 1634 } 1635 1636 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1637 { 1638 PetscInt f, *comp = (PetscInt *)ctx; 1639 1640 PetscFunctionBegin; 1641 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1642 PetscFunctionReturn(0); 1643 } 1644 1645 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1646 { 1647 PetscErrorCode ierr; 1648 Vec local,global; 1649 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1650 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1651 PetscBool monolithic = PETSC_FALSE; 1652 1653 PetscFunctionBegin; 1654 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1655 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1656 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1657 /* need to convert from global to local topology information and remove references to information in global ordering */ 1658 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1659 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1660 if (monolithic) { /* just get block size to properly compute vertices */ 1661 if (pcbddc->vertex_size == 1) { 1662 ierr = MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);CHKERRQ(ierr); 1663 } 1664 goto boundary; 1665 } 1666 1667 if (pcbddc->user_provided_isfordofs) { 1668 if (pcbddc->n_ISForDofs) { 1669 PetscInt i; 1670 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1671 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1672 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1673 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1674 } 1675 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1676 pcbddc->n_ISForDofs = 0; 1677 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1678 } 1679 } else { 1680 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1681 DM dm; 1682 1683 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1684 if (!dm) { 1685 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1686 } 1687 if (dm) { 1688 IS *fields; 1689 PetscInt nf,i; 1690 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1691 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1692 for (i=0;i<nf;i++) { 1693 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1694 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1695 } 1696 ierr = PetscFree(fields);CHKERRQ(ierr); 1697 pcbddc->n_ISForDofsLocal = nf; 1698 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1699 PetscContainer c; 1700 1701 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1702 if (c) { 1703 MatISLocalFields lf; 1704 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1705 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1706 } else { /* fallback, create the default fields if bs > 1 */ 1707 PetscInt i, n = matis->A->rmap->n; 1708 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1709 if (i > 1) { 1710 pcbddc->n_ISForDofsLocal = i; 1711 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1712 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1713 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1714 } 1715 } 1716 } 1717 } 1718 } else { 1719 PetscInt i; 1720 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1721 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1722 } 1723 } 1724 } 1725 1726 boundary: 1727 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1728 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1729 } else if (pcbddc->DirichletBoundariesLocal) { 1730 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1731 } 1732 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1733 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1734 } else if (pcbddc->NeumannBoundariesLocal) { 1735 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1736 } 1737 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1738 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1739 } 1740 ierr = VecDestroy(&global);CHKERRQ(ierr); 1741 ierr = VecDestroy(&local);CHKERRQ(ierr); 1742 /* detect local disconnected subdomains if requested (use matis->A) */ 1743 if (pcbddc->detect_disconnected) { 1744 IS primalv = NULL; 1745 PetscInt i; 1746 1747 for (i=0;i<pcbddc->n_local_subs;i++) { 1748 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1749 } 1750 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1751 ierr = PCBDDCDetectDisconnectedComponents(pc,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1752 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1753 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1754 } 1755 /* early stage corner detection */ 1756 { 1757 DM dm; 1758 1759 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1760 if (dm) { 1761 PetscBool isda; 1762 1763 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1764 if (isda) { 1765 ISLocalToGlobalMapping l2l; 1766 IS corners; 1767 Mat lA; 1768 1769 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1770 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1771 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1772 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1773 if (l2l) { 1774 const PetscInt *idx; 1775 PetscInt bs,*idxout,n; 1776 1777 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1778 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1779 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1780 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1781 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1782 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1783 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1784 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1785 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1786 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1787 pcbddc->corner_selected = PETSC_TRUE; 1788 } else { /* not from DMDA */ 1789 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1790 } 1791 } 1792 } 1793 } 1794 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1795 DM dm; 1796 1797 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1798 if (!dm) { 1799 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1800 } 1801 if (dm) { 1802 Vec vcoords; 1803 PetscSection section; 1804 PetscReal *coords; 1805 PetscInt d,cdim,nl,nf,**ctxs; 1806 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1807 1808 ierr = DMGetCoordinateDim(dm,&cdim);CHKERRQ(ierr); 1809 ierr = DMGetDefaultSection(dm,§ion);CHKERRQ(ierr); 1810 ierr = PetscSectionGetNumFields(section,&nf);CHKERRQ(ierr); 1811 ierr = DMCreateGlobalVector(dm,&vcoords);CHKERRQ(ierr); 1812 ierr = VecGetLocalSize(vcoords,&nl);CHKERRQ(ierr); 1813 ierr = PetscMalloc1(nl*cdim,&coords);CHKERRQ(ierr); 1814 ierr = PetscMalloc2(nf,&funcs,nf,&ctxs);CHKERRQ(ierr); 1815 ierr = PetscMalloc1(nf,&ctxs[0]);CHKERRQ(ierr); 1816 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1817 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1818 for (d=0;d<cdim;d++) { 1819 PetscInt i; 1820 const PetscScalar *v; 1821 1822 for (i=0;i<nf;i++) ctxs[i][0] = d; 1823 ierr = DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);CHKERRQ(ierr); 1824 ierr = VecGetArrayRead(vcoords,&v);CHKERRQ(ierr); 1825 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1826 ierr = VecRestoreArrayRead(vcoords,&v);CHKERRQ(ierr); 1827 } 1828 ierr = VecDestroy(&vcoords);CHKERRQ(ierr); 1829 ierr = PCSetCoordinates(pc,cdim,nl,coords);CHKERRQ(ierr); 1830 ierr = PetscFree(coords);CHKERRQ(ierr); 1831 ierr = PetscFree(ctxs[0]);CHKERRQ(ierr); 1832 ierr = PetscFree2(funcs,ctxs);CHKERRQ(ierr); 1833 } 1834 } 1835 PetscFunctionReturn(0); 1836 } 1837 1838 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1839 { 1840 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1841 PetscErrorCode ierr; 1842 IS nis; 1843 const PetscInt *idxs; 1844 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1845 PetscBool *ld; 1846 1847 PetscFunctionBegin; 1848 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1849 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1850 if (mop == MPI_LAND) { 1851 /* init rootdata with true */ 1852 ld = (PetscBool*) matis->sf_rootdata; 1853 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1854 } else { 1855 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1856 } 1857 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1858 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1859 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1860 ld = (PetscBool*) matis->sf_leafdata; 1861 for (i=0;i<nd;i++) 1862 if (-1 < idxs[i] && idxs[i] < n) 1863 ld[idxs[i]] = PETSC_TRUE; 1864 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1865 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1866 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1867 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1868 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1869 if (mop == MPI_LAND) { 1870 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1871 } else { 1872 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1873 } 1874 for (i=0,nnd=0;i<n;i++) 1875 if (ld[i]) 1876 nidxs[nnd++] = i; 1877 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1878 ierr = ISDestroy(is);CHKERRQ(ierr); 1879 *is = nis; 1880 PetscFunctionReturn(0); 1881 } 1882 1883 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1884 { 1885 PC_IS *pcis = (PC_IS*)(pc->data); 1886 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1887 PetscErrorCode ierr; 1888 1889 PetscFunctionBegin; 1890 if (!pcbddc->benign_have_null) { 1891 PetscFunctionReturn(0); 1892 } 1893 if (pcbddc->ChangeOfBasisMatrix) { 1894 Vec swap; 1895 1896 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1897 swap = pcbddc->work_change; 1898 pcbddc->work_change = r; 1899 r = swap; 1900 } 1901 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1902 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1903 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1904 ierr = VecSet(z,0.);CHKERRQ(ierr); 1905 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1906 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1907 if (pcbddc->ChangeOfBasisMatrix) { 1908 pcbddc->work_change = r; 1909 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1910 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1911 } 1912 PetscFunctionReturn(0); 1913 } 1914 1915 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1916 { 1917 PCBDDCBenignMatMult_ctx ctx; 1918 PetscErrorCode ierr; 1919 PetscBool apply_right,apply_left,reset_x; 1920 1921 PetscFunctionBegin; 1922 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1923 if (transpose) { 1924 apply_right = ctx->apply_left; 1925 apply_left = ctx->apply_right; 1926 } else { 1927 apply_right = ctx->apply_right; 1928 apply_left = ctx->apply_left; 1929 } 1930 reset_x = PETSC_FALSE; 1931 if (apply_right) { 1932 const PetscScalar *ax; 1933 PetscInt nl,i; 1934 1935 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1936 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1937 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1938 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1939 for (i=0;i<ctx->benign_n;i++) { 1940 PetscScalar sum,val; 1941 const PetscInt *idxs; 1942 PetscInt nz,j; 1943 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1944 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1945 sum = 0.; 1946 if (ctx->apply_p0) { 1947 val = ctx->work[idxs[nz-1]]; 1948 for (j=0;j<nz-1;j++) { 1949 sum += ctx->work[idxs[j]]; 1950 ctx->work[idxs[j]] += val; 1951 } 1952 } else { 1953 for (j=0;j<nz-1;j++) { 1954 sum += ctx->work[idxs[j]]; 1955 } 1956 } 1957 ctx->work[idxs[nz-1]] -= sum; 1958 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1959 } 1960 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1961 reset_x = PETSC_TRUE; 1962 } 1963 if (transpose) { 1964 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1965 } else { 1966 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1967 } 1968 if (reset_x) { 1969 ierr = VecResetArray(x);CHKERRQ(ierr); 1970 } 1971 if (apply_left) { 1972 PetscScalar *ay; 1973 PetscInt i; 1974 1975 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1976 for (i=0;i<ctx->benign_n;i++) { 1977 PetscScalar sum,val; 1978 const PetscInt *idxs; 1979 PetscInt nz,j; 1980 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1981 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1982 val = -ay[idxs[nz-1]]; 1983 if (ctx->apply_p0) { 1984 sum = 0.; 1985 for (j=0;j<nz-1;j++) { 1986 sum += ay[idxs[j]]; 1987 ay[idxs[j]] += val; 1988 } 1989 ay[idxs[nz-1]] += sum; 1990 } else { 1991 for (j=0;j<nz-1;j++) { 1992 ay[idxs[j]] += val; 1993 } 1994 ay[idxs[nz-1]] = 0.; 1995 } 1996 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1997 } 1998 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1999 } 2000 PetscFunctionReturn(0); 2001 } 2002 2003 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2004 { 2005 PetscErrorCode ierr; 2006 2007 PetscFunctionBegin; 2008 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 2009 PetscFunctionReturn(0); 2010 } 2011 2012 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2013 { 2014 PetscErrorCode ierr; 2015 2016 PetscFunctionBegin; 2017 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2022 { 2023 PC_IS *pcis = (PC_IS*)pc->data; 2024 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2025 PCBDDCBenignMatMult_ctx ctx; 2026 PetscErrorCode ierr; 2027 2028 PetscFunctionBegin; 2029 if (!restore) { 2030 Mat A_IB,A_BI; 2031 PetscScalar *work; 2032 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2033 2034 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2035 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2036 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 2037 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 2038 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2039 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 2040 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 2041 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 2042 ierr = PetscNew(&ctx);CHKERRQ(ierr); 2043 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 2044 ctx->apply_left = PETSC_TRUE; 2045 ctx->apply_right = PETSC_FALSE; 2046 ctx->apply_p0 = PETSC_FALSE; 2047 ctx->benign_n = pcbddc->benign_n; 2048 if (reuse) { 2049 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2050 ctx->free = PETSC_FALSE; 2051 } else { /* TODO: could be optimized for successive solves */ 2052 ISLocalToGlobalMapping N_to_D; 2053 PetscInt i; 2054 2055 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2056 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2057 for (i=0;i<pcbddc->benign_n;i++) { 2058 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2059 } 2060 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2061 ctx->free = PETSC_TRUE; 2062 } 2063 ctx->A = pcis->A_IB; 2064 ctx->work = work; 2065 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2066 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2067 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2068 pcis->A_IB = A_IB; 2069 2070 /* A_BI as A_IB^T */ 2071 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2072 pcbddc->benign_original_mat = pcis->A_BI; 2073 pcis->A_BI = A_BI; 2074 } else { 2075 if (!pcbddc->benign_original_mat) { 2076 PetscFunctionReturn(0); 2077 } 2078 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2079 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2080 pcis->A_IB = ctx->A; 2081 ctx->A = NULL; 2082 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2083 pcis->A_BI = pcbddc->benign_original_mat; 2084 pcbddc->benign_original_mat = NULL; 2085 if (ctx->free) { 2086 PetscInt i; 2087 for (i=0;i<ctx->benign_n;i++) { 2088 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2089 } 2090 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2091 } 2092 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2093 ierr = PetscFree(ctx);CHKERRQ(ierr); 2094 } 2095 PetscFunctionReturn(0); 2096 } 2097 2098 /* used just in bddc debug mode */ 2099 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2100 { 2101 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2102 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2103 Mat An; 2104 PetscErrorCode ierr; 2105 2106 PetscFunctionBegin; 2107 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2108 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2109 if (is1) { 2110 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2111 ierr = MatDestroy(&An);CHKERRQ(ierr); 2112 } else { 2113 *B = An; 2114 } 2115 PetscFunctionReturn(0); 2116 } 2117 2118 /* TODO: add reuse flag */ 2119 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2120 { 2121 Mat Bt; 2122 PetscScalar *a,*bdata; 2123 const PetscInt *ii,*ij; 2124 PetscInt m,n,i,nnz,*bii,*bij; 2125 PetscBool flg_row; 2126 PetscErrorCode ierr; 2127 2128 PetscFunctionBegin; 2129 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2130 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2131 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2132 nnz = n; 2133 for (i=0;i<ii[n];i++) { 2134 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2135 } 2136 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2137 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2138 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2139 nnz = 0; 2140 bii[0] = 0; 2141 for (i=0;i<n;i++) { 2142 PetscInt j; 2143 for (j=ii[i];j<ii[i+1];j++) { 2144 PetscScalar entry = a[j]; 2145 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2146 bij[nnz] = ij[j]; 2147 bdata[nnz] = entry; 2148 nnz++; 2149 } 2150 } 2151 bii[i+1] = nnz; 2152 } 2153 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2154 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2155 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2156 { 2157 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2158 b->free_a = PETSC_TRUE; 2159 b->free_ij = PETSC_TRUE; 2160 } 2161 if (*B == A) { 2162 ierr = MatDestroy(&A);CHKERRQ(ierr); 2163 } 2164 *B = Bt; 2165 PetscFunctionReturn(0); 2166 } 2167 2168 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscInt *ncc, IS* cc[], IS* primalv) 2169 { 2170 Mat B = NULL; 2171 DM dm; 2172 IS is_dummy,*cc_n; 2173 ISLocalToGlobalMapping l2gmap_dummy; 2174 PCBDDCGraph graph; 2175 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2176 PetscInt i,n; 2177 PetscInt *xadj,*adjncy; 2178 PetscBool isplex = PETSC_FALSE; 2179 PetscErrorCode ierr; 2180 2181 PetscFunctionBegin; 2182 if (ncc) *ncc = 0; 2183 if (cc) *cc = NULL; 2184 if (primalv) *primalv = NULL; 2185 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2186 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2187 if (!dm) { 2188 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2189 } 2190 if (dm) { 2191 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2192 } 2193 if (isplex) { /* this code has been modified from plexpartition.c */ 2194 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2195 PetscInt *adj = NULL; 2196 IS cellNumbering; 2197 const PetscInt *cellNum; 2198 PetscBool useCone, useClosure; 2199 PetscSection section; 2200 PetscSegBuffer adjBuffer; 2201 PetscSF sfPoint; 2202 PetscErrorCode ierr; 2203 2204 PetscFunctionBegin; 2205 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2206 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2207 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2208 /* Build adjacency graph via a section/segbuffer */ 2209 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2210 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2211 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2212 /* Always use FVM adjacency to create partitioner graph */ 2213 ierr = DMPlexGetAdjacencyUseCone(dm, &useCone);CHKERRQ(ierr); 2214 ierr = DMPlexGetAdjacencyUseClosure(dm, &useClosure);CHKERRQ(ierr); 2215 ierr = DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);CHKERRQ(ierr); 2216 ierr = DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);CHKERRQ(ierr); 2217 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2218 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2219 for (n = 0, p = pStart; p < pEnd; p++) { 2220 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2221 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2222 adjSize = PETSC_DETERMINE; 2223 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2224 for (a = 0; a < adjSize; ++a) { 2225 const PetscInt point = adj[a]; 2226 if (pStart <= point && point < pEnd) { 2227 PetscInt *PETSC_RESTRICT pBuf; 2228 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2229 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2230 *pBuf = point; 2231 } 2232 } 2233 n++; 2234 } 2235 ierr = DMPlexSetAdjacencyUseCone(dm, useCone);CHKERRQ(ierr); 2236 ierr = DMPlexSetAdjacencyUseClosure(dm, useClosure);CHKERRQ(ierr); 2237 /* Derive CSR graph from section/segbuffer */ 2238 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2239 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2240 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2241 for (idx = 0, p = pStart; p < pEnd; p++) { 2242 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2243 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2244 } 2245 xadj[n] = size; 2246 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2247 /* Clean up */ 2248 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2249 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2250 ierr = PetscFree(adj);CHKERRQ(ierr); 2251 graph->xadj = xadj; 2252 graph->adjncy = adjncy; 2253 } else { 2254 Mat A; 2255 PetscBool filter = PETSC_FALSE, isseqaij, flg_row; 2256 2257 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2258 if (!A->rmap->N || !A->cmap->N) { 2259 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2260 PetscFunctionReturn(0); 2261 } 2262 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2263 if (!isseqaij && filter) { 2264 PetscBool isseqdense; 2265 2266 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2267 if (!isseqdense) { 2268 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2269 } else { /* TODO: rectangular case and LDA */ 2270 PetscScalar *array; 2271 PetscReal chop=1.e-6; 2272 2273 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2274 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2275 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2276 for (i=0;i<n;i++) { 2277 PetscInt j; 2278 for (j=i+1;j<n;j++) { 2279 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2280 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2281 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2282 } 2283 } 2284 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2285 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2286 } 2287 } else { 2288 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2289 B = A; 2290 } 2291 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2292 2293 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2294 if (filter) { 2295 PetscScalar *data; 2296 PetscInt j,cum; 2297 2298 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2299 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2300 cum = 0; 2301 for (i=0;i<n;i++) { 2302 PetscInt t; 2303 2304 for (j=xadj[i];j<xadj[i+1];j++) { 2305 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2306 continue; 2307 } 2308 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2309 } 2310 t = xadj_filtered[i]; 2311 xadj_filtered[i] = cum; 2312 cum += t; 2313 } 2314 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2315 graph->xadj = xadj_filtered; 2316 graph->adjncy = adjncy_filtered; 2317 } else { 2318 graph->xadj = xadj; 2319 graph->adjncy = adjncy; 2320 } 2321 } 2322 /* compute local connected components using PCBDDCGraph */ 2323 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2324 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2325 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2326 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2327 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2328 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2329 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2330 2331 /* partial clean up */ 2332 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2333 if (B) { 2334 PetscBool flg_row; 2335 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2336 ierr = MatDestroy(&B);CHKERRQ(ierr); 2337 } 2338 if (isplex) { 2339 ierr = PetscFree(xadj);CHKERRQ(ierr); 2340 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2341 } 2342 2343 /* get back data */ 2344 if (isplex) { 2345 if (ncc) *ncc = graph->ncc; 2346 if (cc || primalv) { 2347 Mat A; 2348 PetscBT btv,btvt; 2349 PetscSection subSection; 2350 PetscInt *ids,cum,cump,*cids,*pids; 2351 2352 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2353 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2354 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2355 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2356 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2357 2358 cids[0] = 0; 2359 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2360 PetscInt j; 2361 2362 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2363 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2364 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2365 2366 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2367 for (k = 0; k < 2*size; k += 2) { 2368 PetscInt s, p = closure[k], off, dof, cdof; 2369 2370 ierr = PetscSectionGetConstraintDof(subSection, p, &cdof);CHKERRQ(ierr); 2371 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2372 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2373 for (s = 0; s < dof-cdof; s++) { 2374 if (PetscBTLookupSet(btvt,off+s)) continue; 2375 if (!PetscBTLookup(btv,off+s)) { 2376 ids[cum++] = off+s; 2377 } else { /* cross-vertex */ 2378 pids[cump++] = off+s; 2379 } 2380 } 2381 } 2382 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2383 } 2384 cids[i+1] = cum; 2385 /* mark dofs as already assigned */ 2386 for (j = cids[i]; j < cids[i+1]; j++) { 2387 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2388 } 2389 } 2390 if (cc) { 2391 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2392 for (i = 0; i < graph->ncc; i++) { 2393 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2394 } 2395 *cc = cc_n; 2396 } 2397 if (primalv) { 2398 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2399 } 2400 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2401 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2402 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2403 } 2404 } else { 2405 if (ncc) *ncc = graph->ncc; 2406 if (cc) { 2407 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2408 for (i=0;i<graph->ncc;i++) { 2409 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2410 } 2411 *cc = cc_n; 2412 } 2413 } 2414 /* clean up graph */ 2415 graph->xadj = 0; 2416 graph->adjncy = 0; 2417 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2422 { 2423 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2424 PC_IS* pcis = (PC_IS*)(pc->data); 2425 IS dirIS = NULL; 2426 PetscInt i; 2427 PetscErrorCode ierr; 2428 2429 PetscFunctionBegin; 2430 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2431 if (zerodiag) { 2432 Mat A; 2433 Vec vec3_N; 2434 PetscScalar *vals; 2435 const PetscInt *idxs; 2436 PetscInt nz,*count; 2437 2438 /* p0 */ 2439 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2440 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2441 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2442 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2443 for (i=0;i<nz;i++) vals[i] = 1.; 2444 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2445 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2446 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2447 /* v_I */ 2448 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2449 for (i=0;i<nz;i++) vals[i] = 0.; 2450 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2451 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2452 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2453 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2454 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2455 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2456 if (dirIS) { 2457 PetscInt n; 2458 2459 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2460 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2461 for (i=0;i<n;i++) vals[i] = 0.; 2462 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2463 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2464 } 2465 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2466 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2467 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2468 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2469 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2470 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2471 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2472 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2473 ierr = PetscFree(vals);CHKERRQ(ierr); 2474 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2475 2476 /* there should not be any pressure dofs lying on the interface */ 2477 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2478 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2479 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2480 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2481 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2482 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2483 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2484 ierr = PetscFree(count);CHKERRQ(ierr); 2485 } 2486 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2487 2488 /* check PCBDDCBenignGetOrSetP0 */ 2489 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2490 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2491 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2492 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2493 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2494 for (i=0;i<pcbddc->benign_n;i++) { 2495 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2496 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2497 } 2498 PetscFunctionReturn(0); 2499 } 2500 2501 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2502 { 2503 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2504 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2505 PetscInt nz,n; 2506 PetscInt *interior_dofs,n_interior_dofs,nneu; 2507 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2508 PetscErrorCode ierr; 2509 2510 PetscFunctionBegin; 2511 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2512 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2513 for (n=0;n<pcbddc->benign_n;n++) { 2514 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2515 } 2516 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2517 pcbddc->benign_n = 0; 2518 2519 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2520 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2521 Checks if all the pressure dofs in each subdomain have a zero diagonal 2522 If not, a change of basis on pressures is not needed 2523 since the local Schur complements are already SPD 2524 */ 2525 has_null_pressures = PETSC_TRUE; 2526 have_null = PETSC_TRUE; 2527 if (pcbddc->n_ISForDofsLocal) { 2528 IS iP = NULL; 2529 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2530 2531 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2532 ierr = PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2533 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2534 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2535 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2536 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2537 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2538 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2539 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2540 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2541 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2542 if (iP) { 2543 IS newpressures; 2544 2545 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2546 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2547 pressures = newpressures; 2548 } 2549 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2550 if (!sorted) { 2551 ierr = ISSort(pressures);CHKERRQ(ierr); 2552 } 2553 } else { 2554 pressures = NULL; 2555 } 2556 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2557 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2558 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2559 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2560 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2561 if (!sorted) { 2562 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2563 } 2564 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2565 zerodiag_save = zerodiag; 2566 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2567 if (!nz) { 2568 if (n) have_null = PETSC_FALSE; 2569 has_null_pressures = PETSC_FALSE; 2570 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2571 } 2572 recompute_zerodiag = PETSC_FALSE; 2573 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2574 zerodiag_subs = NULL; 2575 pcbddc->benign_n = 0; 2576 n_interior_dofs = 0; 2577 interior_dofs = NULL; 2578 nneu = 0; 2579 if (pcbddc->NeumannBoundariesLocal) { 2580 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2581 } 2582 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2583 if (checkb) { /* need to compute interior nodes */ 2584 PetscInt n,i,j; 2585 PetscInt n_neigh,*neigh,*n_shared,**shared; 2586 PetscInt *iwork; 2587 2588 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2589 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2590 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2591 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2592 for (i=1;i<n_neigh;i++) 2593 for (j=0;j<n_shared[i];j++) 2594 iwork[shared[i][j]] += 1; 2595 for (i=0;i<n;i++) 2596 if (!iwork[i]) 2597 interior_dofs[n_interior_dofs++] = i; 2598 ierr = PetscFree(iwork);CHKERRQ(ierr); 2599 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2600 } 2601 if (has_null_pressures) { 2602 IS *subs; 2603 PetscInt nsubs,i,j,nl; 2604 const PetscInt *idxs; 2605 PetscScalar *array; 2606 Vec *work; 2607 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2608 2609 subs = pcbddc->local_subs; 2610 nsubs = pcbddc->n_local_subs; 2611 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2612 if (checkb) { 2613 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2614 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2615 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2616 /* work[0] = 1_p */ 2617 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2618 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2619 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2620 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2621 /* work[0] = 1_v */ 2622 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2623 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2624 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2625 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2626 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2627 } 2628 if (nsubs > 1) { 2629 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2630 for (i=0;i<nsubs;i++) { 2631 ISLocalToGlobalMapping l2g; 2632 IS t_zerodiag_subs; 2633 PetscInt nl; 2634 2635 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2636 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2637 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2638 if (nl) { 2639 PetscBool valid = PETSC_TRUE; 2640 2641 if (checkb) { 2642 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2643 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2644 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2645 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2646 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2647 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2648 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2649 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2650 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2651 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2652 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2653 for (j=0;j<n_interior_dofs;j++) { 2654 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2655 valid = PETSC_FALSE; 2656 break; 2657 } 2658 } 2659 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2660 } 2661 if (valid && nneu) { 2662 const PetscInt *idxs; 2663 PetscInt nzb; 2664 2665 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2666 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2667 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2668 if (nzb) valid = PETSC_FALSE; 2669 } 2670 if (valid && pressures) { 2671 IS t_pressure_subs; 2672 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2673 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2674 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2675 } 2676 if (valid) { 2677 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2678 pcbddc->benign_n++; 2679 } else { 2680 recompute_zerodiag = PETSC_TRUE; 2681 } 2682 } 2683 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2684 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2685 } 2686 } else { /* there's just one subdomain (or zero if they have not been detected */ 2687 PetscBool valid = PETSC_TRUE; 2688 2689 if (nneu) valid = PETSC_FALSE; 2690 if (valid && pressures) { 2691 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2692 } 2693 if (valid && checkb) { 2694 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2695 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2696 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2697 for (j=0;j<n_interior_dofs;j++) { 2698 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2699 valid = PETSC_FALSE; 2700 break; 2701 } 2702 } 2703 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2704 } 2705 if (valid) { 2706 pcbddc->benign_n = 1; 2707 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2708 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2709 zerodiag_subs[0] = zerodiag; 2710 } 2711 } 2712 if (checkb) { 2713 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2714 } 2715 } 2716 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2717 2718 if (!pcbddc->benign_n) { 2719 PetscInt n; 2720 2721 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2722 recompute_zerodiag = PETSC_FALSE; 2723 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2724 if (n) { 2725 has_null_pressures = PETSC_FALSE; 2726 have_null = PETSC_FALSE; 2727 } 2728 } 2729 2730 /* final check for null pressures */ 2731 if (zerodiag && pressures) { 2732 PetscInt nz,np; 2733 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2734 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2735 if (nz != np) have_null = PETSC_FALSE; 2736 } 2737 2738 if (recompute_zerodiag) { 2739 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2740 if (pcbddc->benign_n == 1) { 2741 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2742 zerodiag = zerodiag_subs[0]; 2743 } else { 2744 PetscInt i,nzn,*new_idxs; 2745 2746 nzn = 0; 2747 for (i=0;i<pcbddc->benign_n;i++) { 2748 PetscInt ns; 2749 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2750 nzn += ns; 2751 } 2752 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2753 nzn = 0; 2754 for (i=0;i<pcbddc->benign_n;i++) { 2755 PetscInt ns,*idxs; 2756 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2757 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2758 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2759 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2760 nzn += ns; 2761 } 2762 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2763 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2764 } 2765 have_null = PETSC_FALSE; 2766 } 2767 2768 /* Prepare matrix to compute no-net-flux */ 2769 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2770 Mat A,loc_divudotp; 2771 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2772 IS row,col,isused = NULL; 2773 PetscInt M,N,n,st,n_isused; 2774 2775 if (pressures) { 2776 isused = pressures; 2777 } else { 2778 isused = zerodiag_save; 2779 } 2780 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2781 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2782 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2783 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2784 n_isused = 0; 2785 if (isused) { 2786 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2787 } 2788 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2789 st = st-n_isused; 2790 if (n) { 2791 const PetscInt *gidxs; 2792 2793 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2794 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2795 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2796 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2797 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2798 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2799 } else { 2800 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2801 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2802 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2803 } 2804 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2805 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2806 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2807 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2808 ierr = ISDestroy(&row);CHKERRQ(ierr); 2809 ierr = ISDestroy(&col);CHKERRQ(ierr); 2810 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2811 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2812 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2813 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2814 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2815 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2816 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2817 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2818 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2819 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2820 } 2821 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2822 2823 /* change of basis and p0 dofs */ 2824 if (has_null_pressures) { 2825 IS zerodiagc; 2826 const PetscInt *idxs,*idxsc; 2827 PetscInt i,s,*nnz; 2828 2829 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2830 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2831 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2832 /* local change of basis for pressures */ 2833 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2834 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2835 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2836 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2837 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2838 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2839 for (i=0;i<pcbddc->benign_n;i++) { 2840 PetscInt nzs,j; 2841 2842 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2843 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2844 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2845 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2846 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2847 } 2848 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2849 ierr = PetscFree(nnz);CHKERRQ(ierr); 2850 /* set identity on velocities */ 2851 for (i=0;i<n-nz;i++) { 2852 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2853 } 2854 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2855 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2856 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2857 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2858 /* set change on pressures */ 2859 for (s=0;s<pcbddc->benign_n;s++) { 2860 PetscScalar *array; 2861 PetscInt nzs; 2862 2863 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2864 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2865 for (i=0;i<nzs-1;i++) { 2866 PetscScalar vals[2]; 2867 PetscInt cols[2]; 2868 2869 cols[0] = idxs[i]; 2870 cols[1] = idxs[nzs-1]; 2871 vals[0] = 1.; 2872 vals[1] = 1.; 2873 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2874 } 2875 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2876 for (i=0;i<nzs-1;i++) array[i] = -1.; 2877 array[nzs-1] = 1.; 2878 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2879 /* store local idxs for p0 */ 2880 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2881 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2882 ierr = PetscFree(array);CHKERRQ(ierr); 2883 } 2884 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2885 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2886 /* project if needed */ 2887 if (pcbddc->benign_change_explicit) { 2888 Mat M; 2889 2890 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2891 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2892 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2893 ierr = MatDestroy(&M);CHKERRQ(ierr); 2894 } 2895 /* store global idxs for p0 */ 2896 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2897 } 2898 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2899 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2900 2901 /* determines if the coarse solver will be singular or not */ 2902 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2903 /* determines if the problem has subdomains with 0 pressure block */ 2904 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2905 *zerodiaglocal = zerodiag; 2906 PetscFunctionReturn(0); 2907 } 2908 2909 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2910 { 2911 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2912 PetscScalar *array; 2913 PetscErrorCode ierr; 2914 2915 PetscFunctionBegin; 2916 if (!pcbddc->benign_sf) { 2917 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2918 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2919 } 2920 if (get) { 2921 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2922 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2923 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2924 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2925 } else { 2926 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2927 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2928 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2929 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2930 } 2931 PetscFunctionReturn(0); 2932 } 2933 2934 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2935 { 2936 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2937 PetscErrorCode ierr; 2938 2939 PetscFunctionBegin; 2940 /* TODO: add error checking 2941 - avoid nested pop (or push) calls. 2942 - cannot push before pop. 2943 - cannot call this if pcbddc->local_mat is NULL 2944 */ 2945 if (!pcbddc->benign_n) { 2946 PetscFunctionReturn(0); 2947 } 2948 if (pop) { 2949 if (pcbddc->benign_change_explicit) { 2950 IS is_p0; 2951 MatReuse reuse; 2952 2953 /* extract B_0 */ 2954 reuse = MAT_INITIAL_MATRIX; 2955 if (pcbddc->benign_B0) { 2956 reuse = MAT_REUSE_MATRIX; 2957 } 2958 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2959 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2960 /* remove rows and cols from local problem */ 2961 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2962 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2963 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2964 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2965 } else { 2966 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2967 PetscScalar *vals; 2968 PetscInt i,n,*idxs_ins; 2969 2970 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2971 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2972 if (!pcbddc->benign_B0) { 2973 PetscInt *nnz; 2974 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2975 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2976 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2977 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2978 for (i=0;i<pcbddc->benign_n;i++) { 2979 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2980 nnz[i] = n - nnz[i]; 2981 } 2982 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2983 ierr = PetscFree(nnz);CHKERRQ(ierr); 2984 } 2985 2986 for (i=0;i<pcbddc->benign_n;i++) { 2987 PetscScalar *array; 2988 PetscInt *idxs,j,nz,cum; 2989 2990 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2991 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2992 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2993 for (j=0;j<nz;j++) vals[j] = 1.; 2994 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2995 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2996 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2997 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2998 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2999 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 3000 cum = 0; 3001 for (j=0;j<n;j++) { 3002 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3003 vals[cum] = array[j]; 3004 idxs_ins[cum] = j; 3005 cum++; 3006 } 3007 } 3008 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 3009 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 3010 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3011 } 3012 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3013 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3014 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 3015 } 3016 } else { /* push */ 3017 if (pcbddc->benign_change_explicit) { 3018 PetscInt i; 3019 3020 for (i=0;i<pcbddc->benign_n;i++) { 3021 PetscScalar *B0_vals; 3022 PetscInt *B0_cols,B0_ncol; 3023 3024 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3025 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3026 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3027 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 3028 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3029 } 3030 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3031 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3032 } else { 3033 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 3034 } 3035 } 3036 PetscFunctionReturn(0); 3037 } 3038 3039 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3040 { 3041 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3042 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3043 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3044 PetscBLASInt *B_iwork,*B_ifail; 3045 PetscScalar *work,lwork; 3046 PetscScalar *St,*S,*eigv; 3047 PetscScalar *Sarray,*Starray; 3048 PetscReal *eigs,thresh,lthresh,uthresh; 3049 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3050 PetscBool allocated_S_St; 3051 #if defined(PETSC_USE_COMPLEX) 3052 PetscReal *rwork; 3053 #endif 3054 PetscErrorCode ierr; 3055 3056 PetscFunctionBegin; 3057 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3058 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3059 if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3060 3061 if (pcbddc->dbg_flag) { 3062 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3063 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3064 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3065 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3066 } 3067 3068 if (pcbddc->dbg_flag) { 3069 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 3070 } 3071 3072 /* max size of subsets */ 3073 mss = 0; 3074 for (i=0;i<sub_schurs->n_subs;i++) { 3075 PetscInt subset_size; 3076 3077 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3078 mss = PetscMax(mss,subset_size); 3079 } 3080 3081 /* min/max and threshold */ 3082 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3083 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3084 nmax = PetscMax(nmin,nmax); 3085 allocated_S_St = PETSC_FALSE; 3086 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3087 allocated_S_St = PETSC_TRUE; 3088 } 3089 3090 /* allocate lapack workspace */ 3091 cum = cum2 = 0; 3092 maxneigs = 0; 3093 for (i=0;i<sub_schurs->n_subs;i++) { 3094 PetscInt n,subset_size; 3095 3096 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3097 n = PetscMin(subset_size,nmax); 3098 cum += subset_size; 3099 cum2 += subset_size*n; 3100 maxneigs = PetscMax(maxneigs,n); 3101 } 3102 if (mss) { 3103 if (sub_schurs->is_symmetric) { 3104 PetscBLASInt B_itype = 1; 3105 PetscBLASInt B_N = mss; 3106 PetscReal zero = 0.0; 3107 PetscReal eps = 0.0; /* dlamch? */ 3108 3109 B_lwork = -1; 3110 S = NULL; 3111 St = NULL; 3112 eigs = NULL; 3113 eigv = NULL; 3114 B_iwork = NULL; 3115 B_ifail = NULL; 3116 #if defined(PETSC_USE_COMPLEX) 3117 rwork = NULL; 3118 #endif 3119 thresh = 1.0; 3120 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3121 #if defined(PETSC_USE_COMPLEX) 3122 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3123 #else 3124 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3125 #endif 3126 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3127 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3128 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3129 } else { 3130 lwork = 0; 3131 } 3132 3133 nv = 0; 3134 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3135 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3136 } 3137 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3138 if (allocated_S_St) { 3139 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3140 } 3141 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3142 #if defined(PETSC_USE_COMPLEX) 3143 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3144 #endif 3145 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3146 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3147 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3148 nv+cum,&pcbddc->adaptive_constraints_idxs, 3149 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3150 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 3151 3152 maxneigs = 0; 3153 cum = cumarray = 0; 3154 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3155 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3156 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3157 const PetscInt *idxs; 3158 3159 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3160 for (cum=0;cum<nv;cum++) { 3161 pcbddc->adaptive_constraints_n[cum] = 1; 3162 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3163 pcbddc->adaptive_constraints_data[cum] = 1.0; 3164 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3165 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3166 } 3167 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3168 } 3169 3170 if (mss) { /* multilevel */ 3171 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3172 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3173 } 3174 3175 lthresh = pcbddc->adaptive_threshold[0]; 3176 uthresh = pcbddc->adaptive_threshold[1]; 3177 for (i=0;i<sub_schurs->n_subs;i++) { 3178 const PetscInt *idxs; 3179 PetscReal upper,lower; 3180 PetscInt j,subset_size,eigs_start = 0; 3181 PetscBLASInt B_N; 3182 PetscBool same_data = PETSC_FALSE; 3183 PetscBool scal = PETSC_FALSE; 3184 3185 if (pcbddc->use_deluxe_scaling) { 3186 upper = PETSC_MAX_REAL; 3187 lower = uthresh; 3188 } else { 3189 if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3190 upper = 1./uthresh; 3191 lower = 0.; 3192 } 3193 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3194 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3195 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3196 /* this is experimental: we assume the dofs have been properly grouped to have 3197 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3198 if (!sub_schurs->is_posdef) { 3199 Mat T; 3200 3201 for (j=0;j<subset_size;j++) { 3202 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3203 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);CHKERRQ(ierr); 3204 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3205 ierr = MatDestroy(&T);CHKERRQ(ierr); 3206 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);CHKERRQ(ierr); 3207 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3208 ierr = MatDestroy(&T);CHKERRQ(ierr); 3209 if (sub_schurs->change_primal_sub) { 3210 PetscInt nz,k; 3211 const PetscInt *idxs; 3212 3213 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);CHKERRQ(ierr); 3214 ierr = ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3215 for (k=0;k<nz;k++) { 3216 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3217 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3218 } 3219 ierr = ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3220 } 3221 scal = PETSC_TRUE; 3222 break; 3223 } 3224 } 3225 } 3226 3227 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3228 if (sub_schurs->is_symmetric) { 3229 PetscInt j,k; 3230 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 3231 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3232 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3233 } 3234 for (j=0;j<subset_size;j++) { 3235 for (k=j;k<subset_size;k++) { 3236 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3237 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3238 } 3239 } 3240 } else { 3241 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3242 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3243 } 3244 } else { 3245 S = Sarray + cumarray; 3246 St = Starray + cumarray; 3247 } 3248 /* see if we can save some work */ 3249 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3250 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 3251 } 3252 3253 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3254 B_neigs = 0; 3255 } else { 3256 if (sub_schurs->is_symmetric) { 3257 PetscBLASInt B_itype = 1; 3258 PetscBLASInt B_IL, B_IU; 3259 PetscReal eps = -1.0; /* dlamch? */ 3260 PetscInt nmin_s; 3261 PetscBool compute_range; 3262 3263 compute_range = (PetscBool)!same_data; 3264 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3265 3266 if (pcbddc->dbg_flag) { 3267 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d (range %d).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range);CHKERRQ(ierr); 3268 } 3269 3270 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3271 if (compute_range) { 3272 3273 /* ask for eigenvalues larger than thresh */ 3274 if (sub_schurs->is_posdef) { 3275 #if defined(PETSC_USE_COMPLEX) 3276 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3277 #else 3278 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3279 #endif 3280 } else { /* no theory so far, but it works nicely */ 3281 PetscInt recipe = 0; 3282 PetscReal bb[2]; 3283 3284 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);CHKERRQ(ierr); 3285 switch (recipe) { 3286 case 0: 3287 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3288 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3289 #if defined(PETSC_USE_COMPLEX) 3290 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3291 #else 3292 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3293 #endif 3294 break; 3295 case 1: 3296 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3297 #if defined(PETSC_USE_COMPLEX) 3298 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3299 #else 3300 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3301 #endif 3302 if (!scal) { 3303 PetscBLASInt B_neigs2; 3304 3305 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3306 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3307 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3308 #if defined(PETSC_USE_COMPLEX) 3309 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3310 #else 3311 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3312 #endif 3313 B_neigs += B_neigs2; 3314 } 3315 break; 3316 default: 3317 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe); 3318 break; 3319 } 3320 } 3321 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3322 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3323 B_IL = 1; 3324 #if defined(PETSC_USE_COMPLEX) 3325 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3326 #else 3327 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3328 #endif 3329 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3330 PetscInt k; 3331 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3332 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3333 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3334 nmin = nmax; 3335 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 3336 for (k=0;k<nmax;k++) { 3337 eigs[k] = 1./PETSC_SMALL; 3338 eigv[k*(subset_size+1)] = 1.0; 3339 } 3340 } 3341 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3342 if (B_ierr) { 3343 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3344 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3345 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3346 } 3347 3348 if (B_neigs > nmax) { 3349 if (pcbddc->dbg_flag) { 3350 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax);CHKERRQ(ierr); 3351 } 3352 if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax; 3353 B_neigs = nmax; 3354 } 3355 3356 nmin_s = PetscMin(nmin,B_N); 3357 if (B_neigs < nmin_s) { 3358 PetscBLASInt B_neigs2; 3359 3360 if (pcbddc->use_deluxe_scaling) { 3361 if (scal) { 3362 B_IU = nmin_s; 3363 B_IL = B_neigs + 1; 3364 } else { 3365 B_IL = B_N - nmin_s + 1; 3366 B_IU = B_N - B_neigs; 3367 } 3368 } else { 3369 B_IL = B_neigs + 1; 3370 B_IU = nmin_s; 3371 } 3372 if (pcbddc->dbg_flag) { 3373 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 3374 } 3375 if (sub_schurs->is_symmetric) { 3376 PetscInt j,k; 3377 for (j=0;j<subset_size;j++) { 3378 for (k=j;k<subset_size;k++) { 3379 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3380 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3381 } 3382 } 3383 } else { 3384 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3385 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3386 } 3387 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3388 #if defined(PETSC_USE_COMPLEX) 3389 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3390 #else 3391 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3392 #endif 3393 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3394 B_neigs += B_neigs2; 3395 } 3396 if (B_ierr) { 3397 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3398 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3399 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3400 } 3401 if (pcbddc->dbg_flag) { 3402 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3403 for (j=0;j<B_neigs;j++) { 3404 if (eigs[j] == 0.0) { 3405 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3406 } else { 3407 if (pcbddc->use_deluxe_scaling) { 3408 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3409 } else { 3410 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3411 } 3412 } 3413 } 3414 } 3415 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3416 } 3417 /* change the basis back to the original one */ 3418 if (sub_schurs->change) { 3419 Mat change,phi,phit; 3420 3421 if (pcbddc->dbg_flag > 2) { 3422 PetscInt ii; 3423 for (ii=0;ii<B_neigs;ii++) { 3424 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3425 for (j=0;j<B_N;j++) { 3426 #if defined(PETSC_USE_COMPLEX) 3427 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3428 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3429 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3430 #else 3431 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3432 #endif 3433 } 3434 } 3435 } 3436 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3437 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3438 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3439 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3440 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3441 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3442 } 3443 maxneigs = PetscMax(B_neigs,maxneigs); 3444 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3445 if (B_neigs) { 3446 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3447 3448 if (pcbddc->dbg_flag > 1) { 3449 PetscInt ii; 3450 for (ii=0;ii<B_neigs;ii++) { 3451 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3452 for (j=0;j<B_N;j++) { 3453 #if defined(PETSC_USE_COMPLEX) 3454 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3455 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3456 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3457 #else 3458 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3459 #endif 3460 } 3461 } 3462 } 3463 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3464 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3465 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3466 cum++; 3467 } 3468 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3469 /* shift for next computation */ 3470 cumarray += subset_size*subset_size; 3471 } 3472 if (pcbddc->dbg_flag) { 3473 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3474 } 3475 3476 if (mss) { 3477 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3478 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3479 /* destroy matrices (junk) */ 3480 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3481 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3482 } 3483 if (allocated_S_St) { 3484 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3485 } 3486 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3487 #if defined(PETSC_USE_COMPLEX) 3488 ierr = PetscFree(rwork);CHKERRQ(ierr); 3489 #endif 3490 if (pcbddc->dbg_flag) { 3491 PetscInt maxneigs_r; 3492 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3493 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3494 } 3495 PetscFunctionReturn(0); 3496 } 3497 3498 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3499 { 3500 PetscScalar *coarse_submat_vals; 3501 PetscErrorCode ierr; 3502 3503 PetscFunctionBegin; 3504 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3505 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3506 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3507 3508 /* Setup local neumann solver ksp_R */ 3509 /* PCBDDCSetUpLocalScatters should be called first! */ 3510 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3511 3512 /* 3513 Setup local correction and local part of coarse basis. 3514 Gives back the dense local part of the coarse matrix in column major ordering 3515 */ 3516 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3517 3518 /* Compute total number of coarse nodes and setup coarse solver */ 3519 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3520 3521 /* free */ 3522 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3523 PetscFunctionReturn(0); 3524 } 3525 3526 PetscErrorCode PCBDDCResetCustomization(PC pc) 3527 { 3528 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3529 PetscErrorCode ierr; 3530 3531 PetscFunctionBegin; 3532 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3533 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3534 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3535 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3536 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3537 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3538 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3539 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3540 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3541 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3542 PetscFunctionReturn(0); 3543 } 3544 3545 PetscErrorCode PCBDDCResetTopography(PC pc) 3546 { 3547 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3548 PetscInt i; 3549 PetscErrorCode ierr; 3550 3551 PetscFunctionBegin; 3552 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3553 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3554 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3555 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3556 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3557 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3558 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3559 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3560 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3561 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3562 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3563 for (i=0;i<pcbddc->n_local_subs;i++) { 3564 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3565 } 3566 pcbddc->n_local_subs = 0; 3567 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3568 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3569 pcbddc->graphanalyzed = PETSC_FALSE; 3570 pcbddc->recompute_topography = PETSC_TRUE; 3571 pcbddc->corner_selected = PETSC_FALSE; 3572 PetscFunctionReturn(0); 3573 } 3574 3575 PetscErrorCode PCBDDCResetSolvers(PC pc) 3576 { 3577 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3578 PetscErrorCode ierr; 3579 3580 PetscFunctionBegin; 3581 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3582 if (pcbddc->coarse_phi_B) { 3583 PetscScalar *array; 3584 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3585 ierr = PetscFree(array);CHKERRQ(ierr); 3586 } 3587 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3588 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3589 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3590 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3591 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3592 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3593 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3594 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3595 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3596 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3597 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3598 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3599 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3600 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3601 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3602 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3603 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3604 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3605 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3606 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3607 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3608 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3609 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3610 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3611 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3612 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3613 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3614 if (pcbddc->benign_zerodiag_subs) { 3615 PetscInt i; 3616 for (i=0;i<pcbddc->benign_n;i++) { 3617 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3618 } 3619 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3620 } 3621 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3622 PetscFunctionReturn(0); 3623 } 3624 3625 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3626 { 3627 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3628 PC_IS *pcis = (PC_IS*)pc->data; 3629 VecType impVecType; 3630 PetscInt n_constraints,n_R,old_size; 3631 PetscErrorCode ierr; 3632 3633 PetscFunctionBegin; 3634 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3635 n_R = pcis->n - pcbddc->n_vertices; 3636 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3637 /* local work vectors (try to avoid unneeded work)*/ 3638 /* R nodes */ 3639 old_size = -1; 3640 if (pcbddc->vec1_R) { 3641 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3642 } 3643 if (n_R != old_size) { 3644 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3645 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3646 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3647 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3648 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3649 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3650 } 3651 /* local primal dofs */ 3652 old_size = -1; 3653 if (pcbddc->vec1_P) { 3654 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3655 } 3656 if (pcbddc->local_primal_size != old_size) { 3657 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3658 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3659 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3660 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3661 } 3662 /* local explicit constraints */ 3663 old_size = -1; 3664 if (pcbddc->vec1_C) { 3665 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3666 } 3667 if (n_constraints && n_constraints != old_size) { 3668 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3669 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3670 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3671 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3672 } 3673 PetscFunctionReturn(0); 3674 } 3675 3676 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3677 { 3678 PetscErrorCode ierr; 3679 /* pointers to pcis and pcbddc */ 3680 PC_IS* pcis = (PC_IS*)pc->data; 3681 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3682 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3683 /* submatrices of local problem */ 3684 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3685 /* submatrices of local coarse problem */ 3686 Mat S_VV,S_CV,S_VC,S_CC; 3687 /* working matrices */ 3688 Mat C_CR; 3689 /* additional working stuff */ 3690 PC pc_R; 3691 Mat F,Brhs = NULL; 3692 Vec dummy_vec; 3693 PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs; 3694 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3695 PetscScalar *work; 3696 PetscInt *idx_V_B; 3697 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3698 PetscInt i,n_R,n_D,n_B; 3699 3700 /* some shortcuts to scalars */ 3701 PetscScalar one=1.0,m_one=-1.0; 3702 3703 PetscFunctionBegin; 3704 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3705 3706 /* Set Non-overlapping dimensions */ 3707 n_vertices = pcbddc->n_vertices; 3708 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3709 n_B = pcis->n_B; 3710 n_D = pcis->n - n_B; 3711 n_R = pcis->n - n_vertices; 3712 3713 /* vertices in boundary numbering */ 3714 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3715 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3716 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3717 3718 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3719 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3720 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3721 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3722 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3723 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3724 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3725 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3726 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3727 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3728 3729 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3730 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3731 ierr = PCSetUp(pc_R);CHKERRQ(ierr); 3732 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3733 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3734 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3735 lda_rhs = n_R; 3736 need_benign_correction = PETSC_FALSE; 3737 if (isLU || isILU || isCHOL) { 3738 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3739 } else if (sub_schurs && sub_schurs->reuse_solver) { 3740 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3741 MatFactorType type; 3742 3743 F = reuse_solver->F; 3744 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3745 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3746 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3747 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3748 } else { 3749 F = NULL; 3750 } 3751 3752 /* determine if we can use a sparse right-hand side */ 3753 sparserhs = PETSC_FALSE; 3754 if (F) { 3755 MatSolverType solver; 3756 3757 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3758 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3759 } 3760 3761 /* allocate workspace */ 3762 n = 0; 3763 if (n_constraints) { 3764 n += lda_rhs*n_constraints; 3765 } 3766 if (n_vertices) { 3767 n = PetscMax(2*lda_rhs*n_vertices,n); 3768 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3769 } 3770 if (!pcbddc->symmetric_primal) { 3771 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3772 } 3773 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3774 3775 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3776 dummy_vec = NULL; 3777 if (need_benign_correction && lda_rhs != n_R && F) { 3778 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3779 } 3780 3781 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3782 if (n_constraints) { 3783 Mat M3,C_B; 3784 IS is_aux; 3785 PetscScalar *array,*array2; 3786 3787 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3788 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3789 3790 /* Extract constraints on R nodes: C_{CR} */ 3791 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3792 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3793 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3794 3795 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3796 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3797 if (!sparserhs) { 3798 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3799 for (i=0;i<n_constraints;i++) { 3800 const PetscScalar *row_cmat_values; 3801 const PetscInt *row_cmat_indices; 3802 PetscInt size_of_constraint,j; 3803 3804 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3805 for (j=0;j<size_of_constraint;j++) { 3806 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3807 } 3808 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3809 } 3810 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 3811 } else { 3812 Mat tC_CR; 3813 3814 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3815 if (lda_rhs != n_R) { 3816 PetscScalar *aa; 3817 PetscInt r,*ii,*jj; 3818 PetscBool done; 3819 3820 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3821 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 3822 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 3823 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 3824 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3825 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 3826 } else { 3827 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 3828 tC_CR = C_CR; 3829 } 3830 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 3831 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 3832 } 3833 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3834 if (F) { 3835 if (need_benign_correction) { 3836 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3837 3838 /* rhs is already zero on interior dofs, no need to change the rhs */ 3839 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3840 } 3841 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 3842 if (need_benign_correction) { 3843 PetscScalar *marr; 3844 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3845 3846 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3847 if (lda_rhs != n_R) { 3848 for (i=0;i<n_constraints;i++) { 3849 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3850 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3851 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3852 } 3853 } else { 3854 for (i=0;i<n_constraints;i++) { 3855 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3856 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3857 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3858 } 3859 } 3860 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3861 } 3862 } else { 3863 PetscScalar *marr; 3864 3865 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3866 for (i=0;i<n_constraints;i++) { 3867 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3868 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3869 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3870 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3871 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3872 } 3873 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3874 } 3875 if (sparserhs) { 3876 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3877 } 3878 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3879 if (!pcbddc->switch_static) { 3880 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3881 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3882 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3883 for (i=0;i<n_constraints;i++) { 3884 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3885 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3886 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3887 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3888 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3889 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3890 } 3891 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3892 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3893 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3894 } else { 3895 if (lda_rhs != n_R) { 3896 IS dummy; 3897 3898 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3899 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3900 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3901 } else { 3902 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3903 pcbddc->local_auxmat2 = local_auxmat2_R; 3904 } 3905 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3906 } 3907 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3908 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3909 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3910 if (isCHOL) { 3911 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3912 } else { 3913 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3914 } 3915 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 3916 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3917 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3918 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3919 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3920 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3921 } 3922 3923 /* Get submatrices from subdomain matrix */ 3924 if (n_vertices) { 3925 IS is_aux; 3926 PetscBool isseqaij; 3927 3928 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3929 IS tis; 3930 3931 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3932 ierr = ISSort(tis);CHKERRQ(ierr); 3933 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3934 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3935 } else { 3936 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3937 } 3938 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3939 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3940 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3941 if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */ 3942 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3943 } 3944 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3945 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3946 } 3947 3948 /* Matrix of coarse basis functions (local) */ 3949 if (pcbddc->coarse_phi_B) { 3950 PetscInt on_B,on_primal,on_D=n_D; 3951 if (pcbddc->coarse_phi_D) { 3952 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3953 } 3954 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3955 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3956 PetscScalar *marray; 3957 3958 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3959 ierr = PetscFree(marray);CHKERRQ(ierr); 3960 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3961 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3962 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3963 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3964 } 3965 } 3966 3967 if (!pcbddc->coarse_phi_B) { 3968 PetscScalar *marr; 3969 3970 /* memory size */ 3971 n = n_B*pcbddc->local_primal_size; 3972 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 3973 if (!pcbddc->symmetric_primal) n *= 2; 3974 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 3975 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3976 marr += n_B*pcbddc->local_primal_size; 3977 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3978 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3979 marr += n_D*pcbddc->local_primal_size; 3980 } 3981 if (!pcbddc->symmetric_primal) { 3982 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3983 marr += n_B*pcbddc->local_primal_size; 3984 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3985 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3986 } 3987 } else { 3988 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3989 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3990 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3991 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3992 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3993 } 3994 } 3995 } 3996 3997 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3998 p0_lidx_I = NULL; 3999 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4000 const PetscInt *idxs; 4001 4002 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4003 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 4004 for (i=0;i<pcbddc->benign_n;i++) { 4005 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 4006 } 4007 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4008 } 4009 4010 /* vertices */ 4011 if (n_vertices) { 4012 PetscBool restoreavr = PETSC_FALSE; 4013 4014 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 4015 4016 if (n_R) { 4017 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4018 PetscBLASInt B_N,B_one = 1; 4019 PetscScalar *x,*y; 4020 4021 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 4022 if (need_benign_correction) { 4023 ISLocalToGlobalMapping RtoN; 4024 IS is_p0; 4025 PetscInt *idxs_p0,n; 4026 4027 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 4028 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 4029 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 4030 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 4031 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 4032 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 4033 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 4034 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 4035 } 4036 4037 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4038 if (!sparserhs || need_benign_correction) { 4039 if (lda_rhs == n_R) { 4040 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4041 } else { 4042 PetscScalar *av,*array; 4043 const PetscInt *xadj,*adjncy; 4044 PetscInt n; 4045 PetscBool flg_row; 4046 4047 array = work+lda_rhs*n_vertices; 4048 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4049 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4050 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4051 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 4052 for (i=0;i<n;i++) { 4053 PetscInt j; 4054 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4055 } 4056 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4057 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4058 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 4059 } 4060 if (need_benign_correction) { 4061 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4062 PetscScalar *marr; 4063 4064 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 4065 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4066 4067 | 0 0 0 | (V) 4068 L = | 0 0 -1 | (P-p0) 4069 | 0 0 -1 | (p0) 4070 4071 */ 4072 for (i=0;i<reuse_solver->benign_n;i++) { 4073 const PetscScalar *vals; 4074 const PetscInt *idxs,*idxs_zero; 4075 PetscInt n,j,nz; 4076 4077 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4078 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4079 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4080 for (j=0;j<n;j++) { 4081 PetscScalar val = vals[j]; 4082 PetscInt k,col = idxs[j]; 4083 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4084 } 4085 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4086 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4087 } 4088 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 4089 } 4090 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 4091 Brhs = A_RV; 4092 } else { 4093 Mat tA_RVT,A_RVT; 4094 4095 if (!pcbddc->symmetric_primal) { 4096 /* A_RV already scaled by -1 */ 4097 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 4098 } else { 4099 restoreavr = PETSC_TRUE; 4100 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4101 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 4102 A_RVT = A_VR; 4103 } 4104 if (lda_rhs != n_R) { 4105 PetscScalar *aa; 4106 PetscInt r,*ii,*jj; 4107 PetscBool done; 4108 4109 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4110 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4111 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4112 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4113 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4114 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4115 } else { 4116 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4117 tA_RVT = A_RVT; 4118 } 4119 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4120 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4121 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4122 } 4123 if (F) { 4124 /* need to correct the rhs */ 4125 if (need_benign_correction) { 4126 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4127 PetscScalar *marr; 4128 4129 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4130 if (lda_rhs != n_R) { 4131 for (i=0;i<n_vertices;i++) { 4132 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4133 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4134 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4135 } 4136 } else { 4137 for (i=0;i<n_vertices;i++) { 4138 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4139 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4140 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4141 } 4142 } 4143 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4144 } 4145 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4146 if (restoreavr) { 4147 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4148 } 4149 /* need to correct the solution */ 4150 if (need_benign_correction) { 4151 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4152 PetscScalar *marr; 4153 4154 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4155 if (lda_rhs != n_R) { 4156 for (i=0;i<n_vertices;i++) { 4157 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4158 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4159 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4160 } 4161 } else { 4162 for (i=0;i<n_vertices;i++) { 4163 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4164 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4165 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4166 } 4167 } 4168 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4169 } 4170 } else { 4171 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4172 for (i=0;i<n_vertices;i++) { 4173 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4174 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4175 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4176 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4177 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4178 } 4179 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4180 } 4181 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4182 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4183 /* S_VV and S_CV */ 4184 if (n_constraints) { 4185 Mat B; 4186 4187 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4188 for (i=0;i<n_vertices;i++) { 4189 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4190 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4191 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4192 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4193 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4194 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4195 } 4196 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4197 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 4198 ierr = MatDestroy(&B);CHKERRQ(ierr); 4199 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4200 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4201 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4202 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4203 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4204 ierr = MatDestroy(&B);CHKERRQ(ierr); 4205 } 4206 if (lda_rhs != n_R) { 4207 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4208 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4209 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4210 } 4211 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4212 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4213 if (need_benign_correction) { 4214 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4215 PetscScalar *marr,*sums; 4216 4217 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4218 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4219 for (i=0;i<reuse_solver->benign_n;i++) { 4220 const PetscScalar *vals; 4221 const PetscInt *idxs,*idxs_zero; 4222 PetscInt n,j,nz; 4223 4224 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4225 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4226 for (j=0;j<n_vertices;j++) { 4227 PetscInt k; 4228 sums[j] = 0.; 4229 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4230 } 4231 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4232 for (j=0;j<n;j++) { 4233 PetscScalar val = vals[j]; 4234 PetscInt k; 4235 for (k=0;k<n_vertices;k++) { 4236 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4237 } 4238 } 4239 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4240 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4241 } 4242 ierr = PetscFree(sums);CHKERRQ(ierr); 4243 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4244 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4245 } 4246 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4247 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4248 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 4249 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4250 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4251 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 4252 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4253 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4254 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4255 } else { 4256 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4257 } 4258 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4259 4260 /* coarse basis functions */ 4261 for (i=0;i<n_vertices;i++) { 4262 PetscScalar *y; 4263 4264 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4265 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4266 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4267 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4268 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4269 y[n_B*i+idx_V_B[i]] = 1.0; 4270 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4271 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4272 4273 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4274 PetscInt j; 4275 4276 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4277 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4278 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4279 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4280 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4281 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4282 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4283 } 4284 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4285 } 4286 /* if n_R == 0 the object is not destroyed */ 4287 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4288 } 4289 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4290 4291 if (n_constraints) { 4292 Mat B; 4293 4294 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4295 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4296 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4297 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4298 if (n_vertices) { 4299 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4300 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4301 } else { 4302 Mat S_VCt; 4303 4304 if (lda_rhs != n_R) { 4305 ierr = MatDestroy(&B);CHKERRQ(ierr); 4306 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4307 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4308 } 4309 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4310 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4311 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4312 } 4313 } 4314 ierr = MatDestroy(&B);CHKERRQ(ierr); 4315 /* coarse basis functions */ 4316 for (i=0;i<n_constraints;i++) { 4317 PetscScalar *y; 4318 4319 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4320 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4321 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4322 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4323 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4324 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4325 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4326 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4327 PetscInt j; 4328 4329 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4330 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4331 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4332 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4333 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4334 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4335 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4336 } 4337 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4338 } 4339 } 4340 if (n_constraints) { 4341 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4342 } 4343 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4344 4345 /* coarse matrix entries relative to B_0 */ 4346 if (pcbddc->benign_n) { 4347 Mat B0_B,B0_BPHI; 4348 IS is_dummy; 4349 PetscScalar *data; 4350 PetscInt j; 4351 4352 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4353 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4354 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4355 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4356 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4357 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 4358 for (j=0;j<pcbddc->benign_n;j++) { 4359 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4360 for (i=0;i<pcbddc->local_primal_size;i++) { 4361 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4362 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4363 } 4364 } 4365 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 4366 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4367 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4368 } 4369 4370 /* compute other basis functions for non-symmetric problems */ 4371 if (!pcbddc->symmetric_primal) { 4372 Mat B_V=NULL,B_C=NULL; 4373 PetscScalar *marray; 4374 4375 if (n_constraints) { 4376 Mat S_CCT,C_CRT; 4377 4378 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4379 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4380 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4381 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4382 if (n_vertices) { 4383 Mat S_VCT; 4384 4385 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4386 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4387 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4388 } 4389 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4390 } else { 4391 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4392 } 4393 if (n_vertices && n_R) { 4394 PetscScalar *av,*marray; 4395 const PetscInt *xadj,*adjncy; 4396 PetscInt n; 4397 PetscBool flg_row; 4398 4399 /* B_V = B_V - A_VR^T */ 4400 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4401 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4402 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4403 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4404 for (i=0;i<n;i++) { 4405 PetscInt j; 4406 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4407 } 4408 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4409 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4410 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4411 } 4412 4413 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4414 if (n_vertices) { 4415 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4416 for (i=0;i<n_vertices;i++) { 4417 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4418 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4419 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4420 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4421 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4422 } 4423 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4424 } 4425 if (B_C) { 4426 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4427 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4428 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4429 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4430 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4431 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4432 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4433 } 4434 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4435 } 4436 /* coarse basis functions */ 4437 for (i=0;i<pcbddc->local_primal_size;i++) { 4438 PetscScalar *y; 4439 4440 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4441 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4442 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4443 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4444 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4445 if (i<n_vertices) { 4446 y[n_B*i+idx_V_B[i]] = 1.0; 4447 } 4448 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4449 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4450 4451 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4452 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4453 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4454 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4455 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4456 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4457 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4458 } 4459 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4460 } 4461 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4462 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4463 } 4464 4465 /* free memory */ 4466 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4467 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4468 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4469 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4470 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4471 ierr = PetscFree(work);CHKERRQ(ierr); 4472 if (n_vertices) { 4473 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4474 } 4475 if (n_constraints) { 4476 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4477 } 4478 /* Checking coarse_sub_mat and coarse basis functios */ 4479 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4480 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4481 if (pcbddc->dbg_flag) { 4482 Mat coarse_sub_mat; 4483 Mat AUXMAT,TM1,TM2,TM3,TM4; 4484 Mat coarse_phi_D,coarse_phi_B; 4485 Mat coarse_psi_D,coarse_psi_B; 4486 Mat A_II,A_BB,A_IB,A_BI; 4487 Mat C_B,CPHI; 4488 IS is_dummy; 4489 Vec mones; 4490 MatType checkmattype=MATSEQAIJ; 4491 PetscReal real_value; 4492 4493 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4494 Mat A; 4495 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4496 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4497 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4498 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4499 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4500 ierr = MatDestroy(&A);CHKERRQ(ierr); 4501 } else { 4502 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4503 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4504 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4505 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4506 } 4507 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4508 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4509 if (!pcbddc->symmetric_primal) { 4510 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4511 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4512 } 4513 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4514 4515 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4516 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4517 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4518 if (!pcbddc->symmetric_primal) { 4519 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4520 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4521 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4522 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4523 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4524 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4525 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4526 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4527 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4528 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4529 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4530 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4531 } else { 4532 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4533 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4534 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4535 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4536 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4537 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4538 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4539 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4540 } 4541 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4542 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4543 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4544 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4545 if (pcbddc->benign_n) { 4546 Mat B0_B,B0_BPHI; 4547 PetscScalar *data,*data2; 4548 PetscInt j; 4549 4550 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4551 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4552 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4553 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4554 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4555 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4556 for (j=0;j<pcbddc->benign_n;j++) { 4557 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4558 for (i=0;i<pcbddc->local_primal_size;i++) { 4559 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4560 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4561 } 4562 } 4563 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4564 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4565 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4566 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4567 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4568 } 4569 #if 0 4570 { 4571 PetscViewer viewer; 4572 char filename[256]; 4573 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4574 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4575 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4576 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4577 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4578 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4579 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4580 if (pcbddc->coarse_phi_B) { 4581 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4582 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4583 } 4584 if (pcbddc->coarse_phi_D) { 4585 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4586 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4587 } 4588 if (pcbddc->coarse_psi_B) { 4589 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4590 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4591 } 4592 if (pcbddc->coarse_psi_D) { 4593 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4594 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4595 } 4596 ierr = PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");CHKERRQ(ierr); 4597 ierr = MatView(pcbddc->local_mat,viewer);CHKERRQ(ierr); 4598 ierr = PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");CHKERRQ(ierr); 4599 ierr = MatView(pcbddc->ConstraintMatrix,viewer);CHKERRQ(ierr); 4600 ierr = PetscObjectSetName((PetscObject)pcis->is_I_local,"I");CHKERRQ(ierr); 4601 ierr = ISView(pcis->is_I_local,viewer);CHKERRQ(ierr); 4602 ierr = PetscObjectSetName((PetscObject)pcis->is_B_local,"B");CHKERRQ(ierr); 4603 ierr = ISView(pcis->is_B_local,viewer);CHKERRQ(ierr); 4604 ierr = PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");CHKERRQ(ierr); 4605 ierr = ISView(pcbddc->is_R_local,viewer);CHKERRQ(ierr); 4606 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4607 } 4608 #endif 4609 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4610 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4611 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4612 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4613 4614 /* check constraints */ 4615 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4616 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4617 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4618 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4619 } else { 4620 PetscScalar *data; 4621 Mat tmat; 4622 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4623 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4624 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4625 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4626 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4627 } 4628 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4629 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4630 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4631 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4632 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4633 if (!pcbddc->symmetric_primal) { 4634 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4635 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4636 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4637 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4638 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4639 } 4640 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4641 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4642 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4643 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4644 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4645 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4646 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4647 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4648 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4649 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4650 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4651 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4652 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4653 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4654 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4655 if (!pcbddc->symmetric_primal) { 4656 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4657 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4658 } 4659 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4660 } 4661 /* get back data */ 4662 *coarse_submat_vals_n = coarse_submat_vals; 4663 PetscFunctionReturn(0); 4664 } 4665 4666 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4667 { 4668 Mat *work_mat; 4669 IS isrow_s,iscol_s; 4670 PetscBool rsorted,csorted; 4671 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4672 PetscErrorCode ierr; 4673 4674 PetscFunctionBegin; 4675 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4676 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4677 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4678 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4679 4680 if (!rsorted) { 4681 const PetscInt *idxs; 4682 PetscInt *idxs_sorted,i; 4683 4684 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4685 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4686 for (i=0;i<rsize;i++) { 4687 idxs_perm_r[i] = i; 4688 } 4689 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4690 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4691 for (i=0;i<rsize;i++) { 4692 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4693 } 4694 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4695 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4696 } else { 4697 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4698 isrow_s = isrow; 4699 } 4700 4701 if (!csorted) { 4702 if (isrow == iscol) { 4703 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4704 iscol_s = isrow_s; 4705 } else { 4706 const PetscInt *idxs; 4707 PetscInt *idxs_sorted,i; 4708 4709 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4710 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4711 for (i=0;i<csize;i++) { 4712 idxs_perm_c[i] = i; 4713 } 4714 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4715 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4716 for (i=0;i<csize;i++) { 4717 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4718 } 4719 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4720 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4721 } 4722 } else { 4723 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4724 iscol_s = iscol; 4725 } 4726 4727 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4728 4729 if (!rsorted || !csorted) { 4730 Mat new_mat; 4731 IS is_perm_r,is_perm_c; 4732 4733 if (!rsorted) { 4734 PetscInt *idxs_r,i; 4735 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4736 for (i=0;i<rsize;i++) { 4737 idxs_r[idxs_perm_r[i]] = i; 4738 } 4739 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4740 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4741 } else { 4742 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4743 } 4744 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4745 4746 if (!csorted) { 4747 if (isrow_s == iscol_s) { 4748 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4749 is_perm_c = is_perm_r; 4750 } else { 4751 PetscInt *idxs_c,i; 4752 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4753 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4754 for (i=0;i<csize;i++) { 4755 idxs_c[idxs_perm_c[i]] = i; 4756 } 4757 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4758 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4759 } 4760 } else { 4761 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4762 } 4763 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4764 4765 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4766 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4767 work_mat[0] = new_mat; 4768 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4769 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4770 } 4771 4772 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4773 *B = work_mat[0]; 4774 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4775 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4776 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4777 PetscFunctionReturn(0); 4778 } 4779 4780 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4781 { 4782 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4783 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4784 Mat new_mat,lA; 4785 IS is_local,is_global; 4786 PetscInt local_size; 4787 PetscBool isseqaij; 4788 PetscErrorCode ierr; 4789 4790 PetscFunctionBegin; 4791 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4792 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4793 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4794 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4795 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4796 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4797 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4798 4799 /* check */ 4800 if (pcbddc->dbg_flag) { 4801 Vec x,x_change; 4802 PetscReal error; 4803 4804 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4805 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4806 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4807 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4808 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4809 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4810 if (!pcbddc->change_interior) { 4811 const PetscScalar *x,*y,*v; 4812 PetscReal lerror = 0.; 4813 PetscInt i; 4814 4815 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4816 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4817 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4818 for (i=0;i<local_size;i++) 4819 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4820 lerror = PetscAbsScalar(x[i]-y[i]); 4821 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4822 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4823 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4824 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4825 if (error > PETSC_SMALL) { 4826 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4827 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4828 } else { 4829 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4830 } 4831 } 4832 } 4833 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4834 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4835 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4836 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4837 if (error > PETSC_SMALL) { 4838 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4839 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4840 } else { 4841 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4842 } 4843 } 4844 ierr = VecDestroy(&x);CHKERRQ(ierr); 4845 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4846 } 4847 4848 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4849 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4850 4851 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4852 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4853 if (isseqaij) { 4854 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4855 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4856 if (lA) { 4857 Mat work; 4858 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4859 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4860 ierr = MatDestroy(&work);CHKERRQ(ierr); 4861 } 4862 } else { 4863 Mat work_mat; 4864 4865 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4866 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4867 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4868 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4869 if (lA) { 4870 Mat work; 4871 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4872 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4873 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4874 ierr = MatDestroy(&work);CHKERRQ(ierr); 4875 } 4876 } 4877 if (matis->A->symmetric_set) { 4878 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4879 #if !defined(PETSC_USE_COMPLEX) 4880 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4881 #endif 4882 } 4883 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4884 PetscFunctionReturn(0); 4885 } 4886 4887 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4888 { 4889 PC_IS* pcis = (PC_IS*)(pc->data); 4890 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4891 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4892 PetscInt *idx_R_local=NULL; 4893 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4894 PetscInt vbs,bs; 4895 PetscBT bitmask=NULL; 4896 PetscErrorCode ierr; 4897 4898 PetscFunctionBegin; 4899 /* 4900 No need to setup local scatters if 4901 - primal space is unchanged 4902 AND 4903 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4904 AND 4905 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4906 */ 4907 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4908 PetscFunctionReturn(0); 4909 } 4910 /* destroy old objects */ 4911 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4912 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4913 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4914 /* Set Non-overlapping dimensions */ 4915 n_B = pcis->n_B; 4916 n_D = pcis->n - n_B; 4917 n_vertices = pcbddc->n_vertices; 4918 4919 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4920 4921 /* create auxiliary bitmask and allocate workspace */ 4922 if (!sub_schurs || !sub_schurs->reuse_solver) { 4923 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4924 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4925 for (i=0;i<n_vertices;i++) { 4926 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4927 } 4928 4929 for (i=0, n_R=0; i<pcis->n; i++) { 4930 if (!PetscBTLookup(bitmask,i)) { 4931 idx_R_local[n_R++] = i; 4932 } 4933 } 4934 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4935 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4936 4937 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4938 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4939 } 4940 4941 /* Block code */ 4942 vbs = 1; 4943 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4944 if (bs>1 && !(n_vertices%bs)) { 4945 PetscBool is_blocked = PETSC_TRUE; 4946 PetscInt *vary; 4947 if (!sub_schurs || !sub_schurs->reuse_solver) { 4948 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4949 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4950 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4951 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4952 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4953 for (i=0; i<pcis->n/bs; i++) { 4954 if (vary[i]!=0 && vary[i]!=bs) { 4955 is_blocked = PETSC_FALSE; 4956 break; 4957 } 4958 } 4959 ierr = PetscFree(vary);CHKERRQ(ierr); 4960 } else { 4961 /* Verify directly the R set */ 4962 for (i=0; i<n_R/bs; i++) { 4963 PetscInt j,node=idx_R_local[bs*i]; 4964 for (j=1; j<bs; j++) { 4965 if (node != idx_R_local[bs*i+j]-j) { 4966 is_blocked = PETSC_FALSE; 4967 break; 4968 } 4969 } 4970 } 4971 } 4972 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4973 vbs = bs; 4974 for (i=0;i<n_R/vbs;i++) { 4975 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4976 } 4977 } 4978 } 4979 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4980 if (sub_schurs && sub_schurs->reuse_solver) { 4981 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4982 4983 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4984 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4985 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4986 reuse_solver->is_R = pcbddc->is_R_local; 4987 } else { 4988 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4989 } 4990 4991 /* print some info if requested */ 4992 if (pcbddc->dbg_flag) { 4993 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4994 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4995 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4996 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4997 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4998 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4999 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5000 } 5001 5002 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5003 if (!sub_schurs || !sub_schurs->reuse_solver) { 5004 IS is_aux1,is_aux2; 5005 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5006 5007 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5008 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 5009 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 5010 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5011 for (i=0; i<n_D; i++) { 5012 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 5013 } 5014 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5015 for (i=0, j=0; i<n_R; i++) { 5016 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5017 aux_array1[j++] = i; 5018 } 5019 } 5020 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5021 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5022 for (i=0, j=0; i<n_B; i++) { 5023 if (!PetscBTLookup(bitmask,is_indices[i])) { 5024 aux_array2[j++] = i; 5025 } 5026 } 5027 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5028 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 5029 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 5030 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5031 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 5032 5033 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5034 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 5035 for (i=0, j=0; i<n_R; i++) { 5036 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5037 aux_array1[j++] = i; 5038 } 5039 } 5040 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5041 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5042 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5043 } 5044 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 5045 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5046 } else { 5047 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5048 IS tis; 5049 PetscInt schur_size; 5050 5051 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 5052 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 5053 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 5054 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5055 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5056 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 5057 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5058 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5059 } 5060 } 5061 PetscFunctionReturn(0); 5062 } 5063 5064 5065 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5066 { 5067 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5068 PC_IS *pcis = (PC_IS*)pc->data; 5069 PC pc_temp; 5070 Mat A_RR; 5071 MatReuse reuse; 5072 PetscScalar m_one = -1.0; 5073 PetscReal value; 5074 PetscInt n_D,n_R; 5075 PetscBool check_corr,issbaij; 5076 PetscErrorCode ierr; 5077 /* prefixes stuff */ 5078 char dir_prefix[256],neu_prefix[256],str_level[16]; 5079 size_t len; 5080 5081 PetscFunctionBegin; 5082 5083 /* compute prefixes */ 5084 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 5085 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 5086 if (!pcbddc->current_level) { 5087 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5088 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5089 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 5090 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 5091 } else { 5092 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 5093 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5094 len -= 15; /* remove "pc_bddc_coarse_" */ 5095 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5096 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5097 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5098 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5099 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 5100 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 5101 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 5102 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 5103 } 5104 5105 /* DIRICHLET PROBLEM */ 5106 if (dirichlet) { 5107 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5108 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5109 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 5110 if (pcbddc->dbg_flag) { 5111 Mat A_IIn; 5112 5113 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5114 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5115 pcis->A_II = A_IIn; 5116 } 5117 } 5118 if (pcbddc->local_mat->symmetric_set) { 5119 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5120 } 5121 /* Matrix for Dirichlet problem is pcis->A_II */ 5122 n_D = pcis->n - pcis->n_B; 5123 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5124 void (*f)(void) = 0; 5125 5126 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5127 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5128 /* default */ 5129 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5130 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5131 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5132 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5133 if (issbaij) { 5134 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5135 } else { 5136 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5137 } 5138 /* Allow user's customization */ 5139 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5140 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5141 if (f && pcbddc->mat_graph->cloc) { 5142 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5143 const PetscInt *idxs; 5144 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5145 5146 ierr = ISGetLocalSize(pcis->is_I_local,&nl);CHKERRQ(ierr); 5147 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5148 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5149 for (i=0;i<nl;i++) { 5150 for (d=0;d<cdim;d++) { 5151 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5152 } 5153 } 5154 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5155 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5156 ierr = PetscFree(scoords);CHKERRQ(ierr); 5157 } 5158 } 5159 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 5160 if (sub_schurs && sub_schurs->reuse_solver) { 5161 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5162 5163 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5164 } 5165 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5166 if (!n_D) { 5167 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5168 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5169 } 5170 /* set ksp_D into pcis data */ 5171 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5172 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5173 pcis->ksp_D = pcbddc->ksp_D; 5174 } 5175 5176 /* NEUMANN PROBLEM */ 5177 A_RR = 0; 5178 if (neumann) { 5179 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5180 PetscInt ibs,mbs; 5181 PetscBool issbaij, reuse_neumann_solver; 5182 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5183 5184 reuse_neumann_solver = PETSC_FALSE; 5185 if (sub_schurs && sub_schurs->reuse_solver) { 5186 IS iP; 5187 5188 reuse_neumann_solver = PETSC_TRUE; 5189 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5190 if (iP) reuse_neumann_solver = PETSC_FALSE; 5191 } 5192 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5193 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5194 if (pcbddc->ksp_R) { /* already created ksp */ 5195 PetscInt nn_R; 5196 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5197 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5198 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5199 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5200 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5201 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5202 reuse = MAT_INITIAL_MATRIX; 5203 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5204 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5205 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5206 reuse = MAT_INITIAL_MATRIX; 5207 } else { /* safe to reuse the matrix */ 5208 reuse = MAT_REUSE_MATRIX; 5209 } 5210 } 5211 /* last check */ 5212 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5213 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5214 reuse = MAT_INITIAL_MATRIX; 5215 } 5216 } else { /* first time, so we need to create the matrix */ 5217 reuse = MAT_INITIAL_MATRIX; 5218 } 5219 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 5220 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5221 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5222 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5223 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5224 if (matis->A == pcbddc->local_mat) { 5225 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5226 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5227 } else { 5228 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5229 } 5230 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5231 if (matis->A == pcbddc->local_mat) { 5232 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5233 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5234 } else { 5235 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5236 } 5237 } 5238 /* extract A_RR */ 5239 if (reuse_neumann_solver) { 5240 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5241 5242 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5243 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5244 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5245 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5246 } else { 5247 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5248 } 5249 } else { 5250 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5251 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5252 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5253 } 5254 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5255 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5256 } 5257 if (pcbddc->local_mat->symmetric_set) { 5258 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5259 } 5260 if (!pcbddc->ksp_R) { /* create object if not present */ 5261 void (*f)(void) = 0; 5262 5263 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5264 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5265 /* default */ 5266 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5267 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5268 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5269 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5270 if (issbaij) { 5271 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5272 } else { 5273 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5274 } 5275 /* Allow user's customization */ 5276 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5277 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5278 if (f && pcbddc->mat_graph->cloc) { 5279 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5280 const PetscInt *idxs; 5281 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5282 5283 ierr = ISGetLocalSize(pcbddc->is_R_local,&nl);CHKERRQ(ierr); 5284 ierr = ISGetIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5285 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5286 for (i=0;i<nl;i++) { 5287 for (d=0;d<cdim;d++) { 5288 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5289 } 5290 } 5291 ierr = ISRestoreIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5292 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5293 ierr = PetscFree(scoords);CHKERRQ(ierr); 5294 } 5295 } 5296 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5297 if (!n_R) { 5298 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5299 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5300 } 5301 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5302 /* Reuse solver if it is present */ 5303 if (reuse_neumann_solver) { 5304 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5305 5306 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5307 } 5308 } 5309 5310 if (pcbddc->dbg_flag) { 5311 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5312 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5313 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5314 } 5315 5316 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5317 check_corr = PETSC_FALSE; 5318 if (pcbddc->NullSpace_corr[0]) { 5319 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5320 } 5321 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5322 check_corr = PETSC_TRUE; 5323 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5324 } 5325 if (neumann && pcbddc->NullSpace_corr[2]) { 5326 check_corr = PETSC_TRUE; 5327 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5328 } 5329 /* check Dirichlet and Neumann solvers */ 5330 if (pcbddc->dbg_flag) { 5331 if (dirichlet) { /* Dirichlet */ 5332 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5333 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5334 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5335 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5336 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5337 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5338 if (check_corr) { 5339 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 5340 } 5341 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5342 } 5343 if (neumann) { /* Neumann */ 5344 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5345 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5346 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5347 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5348 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5349 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5350 if (check_corr) { 5351 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 5352 } 5353 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5354 } 5355 } 5356 /* free Neumann problem's matrix */ 5357 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5358 PetscFunctionReturn(0); 5359 } 5360 5361 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5362 { 5363 PetscErrorCode ierr; 5364 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5365 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5366 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 5367 5368 PetscFunctionBegin; 5369 if (!reuse_solver) { 5370 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5371 } 5372 if (!pcbddc->switch_static) { 5373 if (applytranspose && pcbddc->local_auxmat1) { 5374 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5375 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5376 } 5377 if (!reuse_solver) { 5378 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5379 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5380 } else { 5381 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5382 5383 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5384 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5385 } 5386 } else { 5387 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5388 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5389 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5390 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5391 if (applytranspose && pcbddc->local_auxmat1) { 5392 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5393 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5394 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5395 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5396 } 5397 } 5398 if (!reuse_solver || pcbddc->switch_static) { 5399 if (applytranspose) { 5400 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5401 } else { 5402 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5403 } 5404 } else { 5405 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5406 5407 if (applytranspose) { 5408 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5409 } else { 5410 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5411 } 5412 } 5413 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5414 if (!pcbddc->switch_static) { 5415 if (!reuse_solver) { 5416 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5417 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5418 } else { 5419 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5420 5421 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5422 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5423 } 5424 if (!applytranspose && pcbddc->local_auxmat1) { 5425 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5426 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5427 } 5428 } else { 5429 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5430 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5431 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5432 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5433 if (!applytranspose && pcbddc->local_auxmat1) { 5434 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5435 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5436 } 5437 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5438 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5439 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5440 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5441 } 5442 PetscFunctionReturn(0); 5443 } 5444 5445 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5446 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5447 { 5448 PetscErrorCode ierr; 5449 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5450 PC_IS* pcis = (PC_IS*) (pc->data); 5451 const PetscScalar zero = 0.0; 5452 5453 PetscFunctionBegin; 5454 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5455 if (!pcbddc->benign_apply_coarse_only) { 5456 if (applytranspose) { 5457 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5458 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5459 } else { 5460 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5461 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5462 } 5463 } else { 5464 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5465 } 5466 5467 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5468 if (pcbddc->benign_n) { 5469 PetscScalar *array; 5470 PetscInt j; 5471 5472 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5473 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5474 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5475 } 5476 5477 /* start communications from local primal nodes to rhs of coarse solver */ 5478 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5479 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5480 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5481 5482 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5483 if (pcbddc->coarse_ksp) { 5484 Mat coarse_mat; 5485 Vec rhs,sol; 5486 MatNullSpace nullsp; 5487 PetscBool isbddc = PETSC_FALSE; 5488 5489 if (pcbddc->benign_have_null) { 5490 PC coarse_pc; 5491 5492 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5493 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5494 /* we need to propagate to coarser levels the need for a possible benign correction */ 5495 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5496 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5497 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5498 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5499 } 5500 } 5501 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5502 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5503 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5504 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5505 if (nullsp) { 5506 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5507 } 5508 if (applytranspose) { 5509 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5510 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5511 } else { 5512 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5513 PC coarse_pc; 5514 5515 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5516 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5517 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5518 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5519 } else { 5520 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5521 } 5522 } 5523 /* we don't need the benign correction at coarser levels anymore */ 5524 if (pcbddc->benign_have_null && isbddc) { 5525 PC coarse_pc; 5526 PC_BDDC* coarsepcbddc; 5527 5528 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5529 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5530 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5531 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5532 } 5533 if (nullsp) { 5534 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5535 } 5536 } 5537 5538 /* Local solution on R nodes */ 5539 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5540 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5541 } 5542 /* communications from coarse sol to local primal nodes */ 5543 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5544 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5545 5546 /* Sum contributions from the two levels */ 5547 if (!pcbddc->benign_apply_coarse_only) { 5548 if (applytranspose) { 5549 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5550 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5551 } else { 5552 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5553 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5554 } 5555 /* store p0 */ 5556 if (pcbddc->benign_n) { 5557 PetscScalar *array; 5558 PetscInt j; 5559 5560 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5561 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5562 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5563 } 5564 } else { /* expand the coarse solution */ 5565 if (applytranspose) { 5566 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5567 } else { 5568 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5569 } 5570 } 5571 PetscFunctionReturn(0); 5572 } 5573 5574 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5575 { 5576 PetscErrorCode ierr; 5577 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5578 PetscScalar *array; 5579 Vec from,to; 5580 5581 PetscFunctionBegin; 5582 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5583 from = pcbddc->coarse_vec; 5584 to = pcbddc->vec1_P; 5585 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5586 Vec tvec; 5587 5588 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5589 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5590 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5591 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5592 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5593 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5594 } 5595 } else { /* from local to global -> put data in coarse right hand side */ 5596 from = pcbddc->vec1_P; 5597 to = pcbddc->coarse_vec; 5598 } 5599 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5600 PetscFunctionReturn(0); 5601 } 5602 5603 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5604 { 5605 PetscErrorCode ierr; 5606 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5607 PetscScalar *array; 5608 Vec from,to; 5609 5610 PetscFunctionBegin; 5611 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5612 from = pcbddc->coarse_vec; 5613 to = pcbddc->vec1_P; 5614 } else { /* from local to global -> put data in coarse right hand side */ 5615 from = pcbddc->vec1_P; 5616 to = pcbddc->coarse_vec; 5617 } 5618 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5619 if (smode == SCATTER_FORWARD) { 5620 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5621 Vec tvec; 5622 5623 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5624 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5625 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5626 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5627 } 5628 } else { 5629 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5630 ierr = VecResetArray(from);CHKERRQ(ierr); 5631 } 5632 } 5633 PetscFunctionReturn(0); 5634 } 5635 5636 /* uncomment for testing purposes */ 5637 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5638 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5639 { 5640 PetscErrorCode ierr; 5641 PC_IS* pcis = (PC_IS*)(pc->data); 5642 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5643 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5644 /* one and zero */ 5645 PetscScalar one=1.0,zero=0.0; 5646 /* space to store constraints and their local indices */ 5647 PetscScalar *constraints_data; 5648 PetscInt *constraints_idxs,*constraints_idxs_B; 5649 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5650 PetscInt *constraints_n; 5651 /* iterators */ 5652 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5653 /* BLAS integers */ 5654 PetscBLASInt lwork,lierr; 5655 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5656 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5657 /* reuse */ 5658 PetscInt olocal_primal_size,olocal_primal_size_cc; 5659 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5660 /* change of basis */ 5661 PetscBool qr_needed; 5662 PetscBT change_basis,qr_needed_idx; 5663 /* auxiliary stuff */ 5664 PetscInt *nnz,*is_indices; 5665 PetscInt ncc; 5666 /* some quantities */ 5667 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5668 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5669 PetscReal tol; /* tolerance for retaining eigenmodes */ 5670 5671 PetscFunctionBegin; 5672 tol = PetscSqrtReal(PETSC_SMALL); 5673 /* Destroy Mat objects computed previously */ 5674 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5675 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5676 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5677 /* save info on constraints from previous setup (if any) */ 5678 olocal_primal_size = pcbddc->local_primal_size; 5679 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5680 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5681 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5682 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5683 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5684 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5685 5686 if (!pcbddc->adaptive_selection) { 5687 IS ISForVertices,*ISForFaces,*ISForEdges; 5688 MatNullSpace nearnullsp; 5689 const Vec *nearnullvecs; 5690 Vec *localnearnullsp; 5691 PetscScalar *array; 5692 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5693 PetscBool nnsp_has_cnst; 5694 /* LAPACK working arrays for SVD or POD */ 5695 PetscBool skip_lapack,boolforchange; 5696 PetscScalar *work; 5697 PetscReal *singular_vals; 5698 #if defined(PETSC_USE_COMPLEX) 5699 PetscReal *rwork; 5700 #endif 5701 #if defined(PETSC_MISSING_LAPACK_GESVD) 5702 PetscScalar *temp_basis,*correlation_mat; 5703 #else 5704 PetscBLASInt dummy_int=1; 5705 PetscScalar dummy_scalar=1.; 5706 #endif 5707 5708 /* Get index sets for faces, edges and vertices from graph */ 5709 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5710 /* print some info */ 5711 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5712 PetscInt nv; 5713 5714 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5715 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5716 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5717 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5718 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5719 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5720 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5721 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5722 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5723 } 5724 5725 /* free unneeded index sets */ 5726 if (!pcbddc->use_vertices) { 5727 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5728 } 5729 if (!pcbddc->use_edges) { 5730 for (i=0;i<n_ISForEdges;i++) { 5731 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5732 } 5733 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5734 n_ISForEdges = 0; 5735 } 5736 if (!pcbddc->use_faces) { 5737 for (i=0;i<n_ISForFaces;i++) { 5738 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5739 } 5740 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5741 n_ISForFaces = 0; 5742 } 5743 5744 /* check if near null space is attached to global mat */ 5745 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5746 if (nearnullsp) { 5747 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5748 /* remove any stored info */ 5749 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5750 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5751 /* store information for BDDC solver reuse */ 5752 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5753 pcbddc->onearnullspace = nearnullsp; 5754 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5755 for (i=0;i<nnsp_size;i++) { 5756 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5757 } 5758 } else { /* if near null space is not provided BDDC uses constants by default */ 5759 nnsp_size = 0; 5760 nnsp_has_cnst = PETSC_TRUE; 5761 } 5762 /* get max number of constraints on a single cc */ 5763 max_constraints = nnsp_size; 5764 if (nnsp_has_cnst) max_constraints++; 5765 5766 /* 5767 Evaluate maximum storage size needed by the procedure 5768 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5769 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5770 There can be multiple constraints per connected component 5771 */ 5772 n_vertices = 0; 5773 if (ISForVertices) { 5774 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5775 } 5776 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5777 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5778 5779 total_counts = n_ISForFaces+n_ISForEdges; 5780 total_counts *= max_constraints; 5781 total_counts += n_vertices; 5782 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5783 5784 total_counts = 0; 5785 max_size_of_constraint = 0; 5786 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5787 IS used_is; 5788 if (i<n_ISForEdges) { 5789 used_is = ISForEdges[i]; 5790 } else { 5791 used_is = ISForFaces[i-n_ISForEdges]; 5792 } 5793 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5794 total_counts += j; 5795 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5796 } 5797 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5798 5799 /* get local part of global near null space vectors */ 5800 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5801 for (k=0;k<nnsp_size;k++) { 5802 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5803 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5804 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5805 } 5806 5807 /* whether or not to skip lapack calls */ 5808 skip_lapack = PETSC_TRUE; 5809 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5810 5811 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5812 if (!skip_lapack) { 5813 PetscScalar temp_work; 5814 5815 #if defined(PETSC_MISSING_LAPACK_GESVD) 5816 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5817 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5818 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5819 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5820 #if defined(PETSC_USE_COMPLEX) 5821 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5822 #endif 5823 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5824 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5825 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5826 lwork = -1; 5827 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5828 #if !defined(PETSC_USE_COMPLEX) 5829 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5830 #else 5831 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5832 #endif 5833 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5834 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5835 #else /* on missing GESVD */ 5836 /* SVD */ 5837 PetscInt max_n,min_n; 5838 max_n = max_size_of_constraint; 5839 min_n = max_constraints; 5840 if (max_size_of_constraint < max_constraints) { 5841 min_n = max_size_of_constraint; 5842 max_n = max_constraints; 5843 } 5844 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5845 #if defined(PETSC_USE_COMPLEX) 5846 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5847 #endif 5848 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5849 lwork = -1; 5850 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5851 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5852 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5853 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5854 #if !defined(PETSC_USE_COMPLEX) 5855 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5856 #else 5857 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5858 #endif 5859 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5860 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5861 #endif /* on missing GESVD */ 5862 /* Allocate optimal workspace */ 5863 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5864 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5865 } 5866 /* Now we can loop on constraining sets */ 5867 total_counts = 0; 5868 constraints_idxs_ptr[0] = 0; 5869 constraints_data_ptr[0] = 0; 5870 /* vertices */ 5871 if (n_vertices) { 5872 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5873 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5874 for (i=0;i<n_vertices;i++) { 5875 constraints_n[total_counts] = 1; 5876 constraints_data[total_counts] = 1.0; 5877 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5878 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5879 total_counts++; 5880 } 5881 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5882 n_vertices = total_counts; 5883 } 5884 5885 /* edges and faces */ 5886 total_counts_cc = total_counts; 5887 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5888 IS used_is; 5889 PetscBool idxs_copied = PETSC_FALSE; 5890 5891 if (ncc<n_ISForEdges) { 5892 used_is = ISForEdges[ncc]; 5893 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5894 } else { 5895 used_is = ISForFaces[ncc-n_ISForEdges]; 5896 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5897 } 5898 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5899 5900 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5901 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5902 /* change of basis should not be performed on local periodic nodes */ 5903 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5904 if (nnsp_has_cnst) { 5905 PetscScalar quad_value; 5906 5907 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5908 idxs_copied = PETSC_TRUE; 5909 5910 if (!pcbddc->use_nnsp_true) { 5911 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5912 } else { 5913 quad_value = 1.0; 5914 } 5915 for (j=0;j<size_of_constraint;j++) { 5916 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5917 } 5918 temp_constraints++; 5919 total_counts++; 5920 } 5921 for (k=0;k<nnsp_size;k++) { 5922 PetscReal real_value; 5923 PetscScalar *ptr_to_data; 5924 5925 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5926 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5927 for (j=0;j<size_of_constraint;j++) { 5928 ptr_to_data[j] = array[is_indices[j]]; 5929 } 5930 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5931 /* check if array is null on the connected component */ 5932 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5933 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5934 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 5935 temp_constraints++; 5936 total_counts++; 5937 if (!idxs_copied) { 5938 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5939 idxs_copied = PETSC_TRUE; 5940 } 5941 } 5942 } 5943 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5944 valid_constraints = temp_constraints; 5945 if (!pcbddc->use_nnsp_true && temp_constraints) { 5946 if (temp_constraints == 1) { /* just normalize the constraint */ 5947 PetscScalar norm,*ptr_to_data; 5948 5949 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5950 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5951 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5952 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5953 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5954 } else { /* perform SVD */ 5955 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5956 5957 #if defined(PETSC_MISSING_LAPACK_GESVD) 5958 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5959 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5960 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5961 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5962 from that computed using LAPACKgesvd 5963 -> This is due to a different computation of eigenvectors in LAPACKheev 5964 -> The quality of the POD-computed basis will be the same */ 5965 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5966 /* Store upper triangular part of correlation matrix */ 5967 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5968 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5969 for (j=0;j<temp_constraints;j++) { 5970 for (k=0;k<j+1;k++) { 5971 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5972 } 5973 } 5974 /* compute eigenvalues and eigenvectors of correlation matrix */ 5975 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5976 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5977 #if !defined(PETSC_USE_COMPLEX) 5978 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5979 #else 5980 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5981 #endif 5982 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5983 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5984 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5985 j = 0; 5986 while (j < temp_constraints && singular_vals[j] < tol) j++; 5987 total_counts = total_counts-j; 5988 valid_constraints = temp_constraints-j; 5989 /* scale and copy POD basis into used quadrature memory */ 5990 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5991 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5992 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5993 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5994 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5995 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5996 if (j<temp_constraints) { 5997 PetscInt ii; 5998 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5999 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6000 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6001 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6002 for (k=0;k<temp_constraints-j;k++) { 6003 for (ii=0;ii<size_of_constraint;ii++) { 6004 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6005 } 6006 } 6007 } 6008 #else /* on missing GESVD */ 6009 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6010 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6011 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6012 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6013 #if !defined(PETSC_USE_COMPLEX) 6014 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6015 #else 6016 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6017 #endif 6018 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6019 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6020 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6021 k = temp_constraints; 6022 if (k > size_of_constraint) k = size_of_constraint; 6023 j = 0; 6024 while (j < k && singular_vals[k-j-1] < tol) j++; 6025 valid_constraints = k-j; 6026 total_counts = total_counts-temp_constraints+valid_constraints; 6027 #endif /* on missing GESVD */ 6028 } 6029 } 6030 /* update pointers information */ 6031 if (valid_constraints) { 6032 constraints_n[total_counts_cc] = valid_constraints; 6033 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6034 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6035 /* set change_of_basis flag */ 6036 if (boolforchange) { 6037 PetscBTSet(change_basis,total_counts_cc); 6038 } 6039 total_counts_cc++; 6040 } 6041 } 6042 /* free workspace */ 6043 if (!skip_lapack) { 6044 ierr = PetscFree(work);CHKERRQ(ierr); 6045 #if defined(PETSC_USE_COMPLEX) 6046 ierr = PetscFree(rwork);CHKERRQ(ierr); 6047 #endif 6048 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 6049 #if defined(PETSC_MISSING_LAPACK_GESVD) 6050 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 6051 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 6052 #endif 6053 } 6054 for (k=0;k<nnsp_size;k++) { 6055 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 6056 } 6057 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 6058 /* free index sets of faces, edges and vertices */ 6059 for (i=0;i<n_ISForFaces;i++) { 6060 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6061 } 6062 if (n_ISForFaces) { 6063 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6064 } 6065 for (i=0;i<n_ISForEdges;i++) { 6066 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6067 } 6068 if (n_ISForEdges) { 6069 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6070 } 6071 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6072 } else { 6073 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6074 6075 total_counts = 0; 6076 n_vertices = 0; 6077 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6078 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6079 } 6080 max_constraints = 0; 6081 total_counts_cc = 0; 6082 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6083 total_counts += pcbddc->adaptive_constraints_n[i]; 6084 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6085 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6086 } 6087 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6088 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6089 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6090 constraints_data = pcbddc->adaptive_constraints_data; 6091 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6092 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 6093 total_counts_cc = 0; 6094 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6095 if (pcbddc->adaptive_constraints_n[i]) { 6096 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6097 } 6098 } 6099 #if 0 6100 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 6101 for (i=0;i<total_counts_cc;i++) { 6102 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 6103 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 6104 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 6105 printf(" %d",constraints_idxs[j]); 6106 } 6107 printf("\n"); 6108 printf("number of cc: %d\n",constraints_n[i]); 6109 } 6110 for (i=0;i<n_vertices;i++) { 6111 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 6112 } 6113 for (i=0;i<sub_schurs->n_subs;i++) { 6114 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 6115 } 6116 #endif 6117 6118 max_size_of_constraint = 0; 6119 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6120 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 6121 /* Change of basis */ 6122 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 6123 if (pcbddc->use_change_of_basis) { 6124 for (i=0;i<sub_schurs->n_subs;i++) { 6125 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6126 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 6127 } 6128 } 6129 } 6130 } 6131 pcbddc->local_primal_size = total_counts; 6132 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6133 6134 /* map constraints_idxs in boundary numbering */ 6135 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 6136 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 6137 6138 /* Create constraint matrix */ 6139 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6140 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 6141 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 6142 6143 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6144 /* determine if a QR strategy is needed for change of basis */ 6145 qr_needed = PETSC_FALSE; 6146 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6147 total_primal_vertices=0; 6148 pcbddc->local_primal_size_cc = 0; 6149 for (i=0;i<total_counts_cc;i++) { 6150 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6151 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6152 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6153 pcbddc->local_primal_size_cc += 1; 6154 } else if (PetscBTLookup(change_basis,i)) { 6155 for (k=0;k<constraints_n[i];k++) { 6156 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6157 } 6158 pcbddc->local_primal_size_cc += constraints_n[i]; 6159 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6160 PetscBTSet(qr_needed_idx,i); 6161 qr_needed = PETSC_TRUE; 6162 } 6163 } else { 6164 pcbddc->local_primal_size_cc += 1; 6165 } 6166 } 6167 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6168 pcbddc->n_vertices = total_primal_vertices; 6169 /* permute indices in order to have a sorted set of vertices */ 6170 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6171 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6172 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 6173 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6174 6175 /* nonzero structure of constraint matrix */ 6176 /* and get reference dof for local constraints */ 6177 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6178 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6179 6180 j = total_primal_vertices; 6181 total_counts = total_primal_vertices; 6182 cum = total_primal_vertices; 6183 for (i=n_vertices;i<total_counts_cc;i++) { 6184 if (!PetscBTLookup(change_basis,i)) { 6185 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6186 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6187 cum++; 6188 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6189 for (k=0;k<constraints_n[i];k++) { 6190 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6191 nnz[j+k] = size_of_constraint; 6192 } 6193 j += constraints_n[i]; 6194 } 6195 } 6196 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6197 ierr = PetscFree(nnz);CHKERRQ(ierr); 6198 6199 /* set values in constraint matrix */ 6200 for (i=0;i<total_primal_vertices;i++) { 6201 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6202 } 6203 total_counts = total_primal_vertices; 6204 for (i=n_vertices;i<total_counts_cc;i++) { 6205 if (!PetscBTLookup(change_basis,i)) { 6206 PetscInt *cols; 6207 6208 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6209 cols = constraints_idxs+constraints_idxs_ptr[i]; 6210 for (k=0;k<constraints_n[i];k++) { 6211 PetscInt row = total_counts+k; 6212 PetscScalar *vals; 6213 6214 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6215 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6216 } 6217 total_counts += constraints_n[i]; 6218 } 6219 } 6220 /* assembling */ 6221 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6222 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6223 ierr = MatChop(pcbddc->ConstraintMatrix,PETSC_SMALL);CHKERRQ(ierr); 6224 ierr = MatSeqAIJCompress(pcbddc->ConstraintMatrix,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6225 ierr = MatViewFromOptions(pcbddc->ConstraintMatrix,NULL,"-pc_bddc_constraint_mat_view");CHKERRQ(ierr); 6226 6227 /* 6228 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6229 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 6230 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 6231 */ 6232 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6233 if (pcbddc->use_change_of_basis) { 6234 /* dual and primal dofs on a single cc */ 6235 PetscInt dual_dofs,primal_dofs; 6236 /* working stuff for GEQRF */ 6237 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 6238 PetscBLASInt lqr_work; 6239 /* working stuff for UNGQR */ 6240 PetscScalar *gqr_work,lgqr_work_t; 6241 PetscBLASInt lgqr_work; 6242 /* working stuff for TRTRS */ 6243 PetscScalar *trs_rhs; 6244 PetscBLASInt Blas_NRHS; 6245 /* pointers for values insertion into change of basis matrix */ 6246 PetscInt *start_rows,*start_cols; 6247 PetscScalar *start_vals; 6248 /* working stuff for values insertion */ 6249 PetscBT is_primal; 6250 PetscInt *aux_primal_numbering_B; 6251 /* matrix sizes */ 6252 PetscInt global_size,local_size; 6253 /* temporary change of basis */ 6254 Mat localChangeOfBasisMatrix; 6255 /* extra space for debugging */ 6256 PetscScalar *dbg_work; 6257 6258 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6259 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6260 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6261 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6262 /* nonzeros for local mat */ 6263 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6264 if (!pcbddc->benign_change || pcbddc->fake_change) { 6265 for (i=0;i<pcis->n;i++) nnz[i]=1; 6266 } else { 6267 const PetscInt *ii; 6268 PetscInt n; 6269 PetscBool flg_row; 6270 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6271 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6272 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6273 } 6274 for (i=n_vertices;i<total_counts_cc;i++) { 6275 if (PetscBTLookup(change_basis,i)) { 6276 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6277 if (PetscBTLookup(qr_needed_idx,i)) { 6278 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6279 } else { 6280 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6281 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6282 } 6283 } 6284 } 6285 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6286 ierr = PetscFree(nnz);CHKERRQ(ierr); 6287 /* Set interior change in the matrix */ 6288 if (!pcbddc->benign_change || pcbddc->fake_change) { 6289 for (i=0;i<pcis->n;i++) { 6290 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6291 } 6292 } else { 6293 const PetscInt *ii,*jj; 6294 PetscScalar *aa; 6295 PetscInt n; 6296 PetscBool flg_row; 6297 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6298 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6299 for (i=0;i<n;i++) { 6300 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6301 } 6302 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6303 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6304 } 6305 6306 if (pcbddc->dbg_flag) { 6307 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6308 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6309 } 6310 6311 6312 /* Now we loop on the constraints which need a change of basis */ 6313 /* 6314 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6315 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6316 6317 Basic blocks of change of basis matrix T computed by 6318 6319 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6320 6321 | 1 0 ... 0 s_1/S | 6322 | 0 1 ... 0 s_2/S | 6323 | ... | 6324 | 0 ... 1 s_{n-1}/S | 6325 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6326 6327 with S = \sum_{i=1}^n s_i^2 6328 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6329 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6330 6331 - QR decomposition of constraints otherwise 6332 */ 6333 if (qr_needed) { 6334 /* space to store Q */ 6335 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6336 /* array to store scaling factors for reflectors */ 6337 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6338 /* first we issue queries for optimal work */ 6339 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6340 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6341 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6342 lqr_work = -1; 6343 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6344 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6345 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6346 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6347 lgqr_work = -1; 6348 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6349 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6350 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6351 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6352 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6353 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6354 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6355 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6356 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6357 /* array to store rhs and solution of triangular solver */ 6358 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6359 /* allocating workspace for check */ 6360 if (pcbddc->dbg_flag) { 6361 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6362 } 6363 } 6364 /* array to store whether a node is primal or not */ 6365 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6366 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6367 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6368 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 6369 for (i=0;i<total_primal_vertices;i++) { 6370 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6371 } 6372 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6373 6374 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6375 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6376 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6377 if (PetscBTLookup(change_basis,total_counts)) { 6378 /* get constraint info */ 6379 primal_dofs = constraints_n[total_counts]; 6380 dual_dofs = size_of_constraint-primal_dofs; 6381 6382 if (pcbddc->dbg_flag) { 6383 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6384 } 6385 6386 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6387 6388 /* copy quadrature constraints for change of basis check */ 6389 if (pcbddc->dbg_flag) { 6390 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6391 } 6392 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6393 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6394 6395 /* compute QR decomposition of constraints */ 6396 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6397 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6398 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6399 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6400 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6401 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6402 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6403 6404 /* explictly compute R^-T */ 6405 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 6406 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6407 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6408 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6409 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6410 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6411 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6412 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6413 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6414 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6415 6416 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6417 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6418 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6419 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6420 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6421 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6422 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6423 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6424 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6425 6426 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6427 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6428 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6429 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6430 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6431 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6432 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6433 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6434 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6435 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6436 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6437 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6438 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6439 6440 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6441 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6442 /* insert cols for primal dofs */ 6443 for (j=0;j<primal_dofs;j++) { 6444 start_vals = &qr_basis[j*size_of_constraint]; 6445 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6446 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6447 } 6448 /* insert cols for dual dofs */ 6449 for (j=0,k=0;j<dual_dofs;k++) { 6450 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6451 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6452 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6453 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6454 j++; 6455 } 6456 } 6457 6458 /* check change of basis */ 6459 if (pcbddc->dbg_flag) { 6460 PetscInt ii,jj; 6461 PetscBool valid_qr=PETSC_TRUE; 6462 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6463 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6464 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6465 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6466 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6467 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6468 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6469 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6470 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6471 for (jj=0;jj<size_of_constraint;jj++) { 6472 for (ii=0;ii<primal_dofs;ii++) { 6473 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6474 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6475 } 6476 } 6477 if (!valid_qr) { 6478 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6479 for (jj=0;jj<size_of_constraint;jj++) { 6480 for (ii=0;ii<primal_dofs;ii++) { 6481 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6482 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6483 } 6484 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6485 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6486 } 6487 } 6488 } 6489 } else { 6490 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6491 } 6492 } 6493 } else { /* simple transformation block */ 6494 PetscInt row,col; 6495 PetscScalar val,norm; 6496 6497 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6498 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6499 for (j=0;j<size_of_constraint;j++) { 6500 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6501 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6502 if (!PetscBTLookup(is_primal,row_B)) { 6503 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6504 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6505 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6506 } else { 6507 for (k=0;k<size_of_constraint;k++) { 6508 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6509 if (row != col) { 6510 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6511 } else { 6512 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6513 } 6514 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6515 } 6516 } 6517 } 6518 if (pcbddc->dbg_flag) { 6519 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6520 } 6521 } 6522 } else { 6523 if (pcbddc->dbg_flag) { 6524 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6525 } 6526 } 6527 } 6528 6529 /* free workspace */ 6530 if (qr_needed) { 6531 if (pcbddc->dbg_flag) { 6532 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6533 } 6534 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6535 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6536 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6537 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6538 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6539 } 6540 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6541 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6542 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6543 6544 /* assembling of global change of variable */ 6545 if (!pcbddc->fake_change) { 6546 Mat tmat; 6547 PetscInt bs; 6548 6549 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6550 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6551 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6552 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6553 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6554 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6555 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6556 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6557 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6558 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6559 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6560 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6561 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6562 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6563 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6564 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6565 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6566 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6567 6568 /* check */ 6569 if (pcbddc->dbg_flag) { 6570 PetscReal error; 6571 Vec x,x_change; 6572 6573 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6574 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6575 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6576 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6577 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6578 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6579 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6580 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6581 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6582 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6583 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6584 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6585 if (error > PETSC_SMALL) { 6586 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6587 } 6588 ierr = VecDestroy(&x);CHKERRQ(ierr); 6589 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6590 } 6591 /* adapt sub_schurs computed (if any) */ 6592 if (pcbddc->use_deluxe_scaling) { 6593 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6594 6595 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6596 if (sub_schurs && sub_schurs->S_Ej_all) { 6597 Mat S_new,tmat; 6598 IS is_all_N,is_V_Sall = NULL; 6599 6600 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6601 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6602 if (pcbddc->deluxe_zerorows) { 6603 ISLocalToGlobalMapping NtoSall; 6604 IS is_V; 6605 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6606 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6607 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6608 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6609 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6610 } 6611 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6612 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6613 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6614 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6615 if (pcbddc->deluxe_zerorows) { 6616 const PetscScalar *array; 6617 const PetscInt *idxs_V,*idxs_all; 6618 PetscInt i,n_V; 6619 6620 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6621 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6622 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6623 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6624 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6625 for (i=0;i<n_V;i++) { 6626 PetscScalar val; 6627 PetscInt idx; 6628 6629 idx = idxs_V[i]; 6630 val = array[idxs_all[idxs_V[i]]]; 6631 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6632 } 6633 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6634 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6635 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6636 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6637 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6638 } 6639 sub_schurs->S_Ej_all = S_new; 6640 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6641 if (sub_schurs->sum_S_Ej_all) { 6642 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6643 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6644 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6645 if (pcbddc->deluxe_zerorows) { 6646 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6647 } 6648 sub_schurs->sum_S_Ej_all = S_new; 6649 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6650 } 6651 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6652 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6653 } 6654 /* destroy any change of basis context in sub_schurs */ 6655 if (sub_schurs && sub_schurs->change) { 6656 PetscInt i; 6657 6658 for (i=0;i<sub_schurs->n_subs;i++) { 6659 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6660 } 6661 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6662 } 6663 } 6664 if (pcbddc->switch_static) { /* need to save the local change */ 6665 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6666 } else { 6667 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6668 } 6669 /* determine if any process has changed the pressures locally */ 6670 pcbddc->change_interior = pcbddc->benign_have_null; 6671 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6672 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6673 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6674 pcbddc->use_qr_single = qr_needed; 6675 } 6676 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6677 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6678 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6679 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6680 } else { 6681 Mat benign_global = NULL; 6682 if (pcbddc->benign_have_null) { 6683 Mat tmat; 6684 6685 pcbddc->change_interior = PETSC_TRUE; 6686 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6687 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6688 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6689 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6690 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6691 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6692 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6693 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6694 if (pcbddc->benign_change) { 6695 Mat M; 6696 6697 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6698 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6699 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6700 ierr = MatDestroy(&M);CHKERRQ(ierr); 6701 } else { 6702 Mat eye; 6703 PetscScalar *array; 6704 6705 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6706 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6707 for (i=0;i<pcis->n;i++) { 6708 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6709 } 6710 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6711 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6712 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6713 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6714 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6715 } 6716 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6717 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6718 } 6719 if (pcbddc->user_ChangeOfBasisMatrix) { 6720 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6721 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6722 } else if (pcbddc->benign_have_null) { 6723 pcbddc->ChangeOfBasisMatrix = benign_global; 6724 } 6725 } 6726 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6727 IS is_global; 6728 const PetscInt *gidxs; 6729 6730 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6731 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6732 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6733 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6734 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6735 } 6736 } 6737 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6738 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6739 } 6740 6741 if (!pcbddc->fake_change) { 6742 /* add pressure dofs to set of primal nodes for numbering purposes */ 6743 for (i=0;i<pcbddc->benign_n;i++) { 6744 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6745 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6746 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6747 pcbddc->local_primal_size_cc++; 6748 pcbddc->local_primal_size++; 6749 } 6750 6751 /* check if a new primal space has been introduced (also take into account benign trick) */ 6752 pcbddc->new_primal_space_local = PETSC_TRUE; 6753 if (olocal_primal_size == pcbddc->local_primal_size) { 6754 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6755 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6756 if (!pcbddc->new_primal_space_local) { 6757 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6758 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6759 } 6760 } 6761 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6762 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6763 } 6764 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6765 6766 /* flush dbg viewer */ 6767 if (pcbddc->dbg_flag) { 6768 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6769 } 6770 6771 /* free workspace */ 6772 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6773 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6774 if (!pcbddc->adaptive_selection) { 6775 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6776 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6777 } else { 6778 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6779 pcbddc->adaptive_constraints_idxs_ptr, 6780 pcbddc->adaptive_constraints_data_ptr, 6781 pcbddc->adaptive_constraints_idxs, 6782 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6783 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6784 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6785 } 6786 PetscFunctionReturn(0); 6787 } 6788 /* #undef PETSC_MISSING_LAPACK_GESVD */ 6789 6790 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6791 { 6792 ISLocalToGlobalMapping map; 6793 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6794 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6795 PetscInt i,N; 6796 PetscBool rcsr = PETSC_FALSE; 6797 PetscErrorCode ierr; 6798 6799 PetscFunctionBegin; 6800 if (pcbddc->recompute_topography) { 6801 pcbddc->graphanalyzed = PETSC_FALSE; 6802 /* Reset previously computed graph */ 6803 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6804 /* Init local Graph struct */ 6805 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6806 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6807 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6808 6809 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6810 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6811 } 6812 /* Check validity of the csr graph passed in by the user */ 6813 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6814 6815 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6816 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6817 PetscInt *xadj,*adjncy; 6818 PetscInt nvtxs; 6819 PetscBool flg_row=PETSC_FALSE; 6820 6821 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6822 if (flg_row) { 6823 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6824 pcbddc->computed_rowadj = PETSC_TRUE; 6825 } 6826 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6827 rcsr = PETSC_TRUE; 6828 } 6829 if (pcbddc->dbg_flag) { 6830 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6831 } 6832 6833 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 6834 PetscReal *lcoords; 6835 PetscInt n; 6836 MPI_Datatype dimrealtype; 6837 6838 if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 6839 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 6840 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 6841 ierr = PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);CHKERRQ(ierr); 6842 ierr = MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);CHKERRQ(ierr); 6843 ierr = MPI_Type_commit(&dimrealtype);CHKERRQ(ierr); 6844 ierr = PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 6845 ierr = PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 6846 ierr = MPI_Type_free(&dimrealtype);CHKERRQ(ierr); 6847 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 6848 6849 pcbddc->mat_graph->coords = lcoords; 6850 pcbddc->mat_graph->cloc = PETSC_TRUE; 6851 pcbddc->mat_graph->cnloc = n; 6852 } 6853 if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 6854 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected); 6855 6856 /* Setup of Graph */ 6857 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6858 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6859 6860 /* attach info on disconnected subdomains if present */ 6861 if (pcbddc->n_local_subs) { 6862 PetscInt *local_subs; 6863 6864 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6865 for (i=0;i<pcbddc->n_local_subs;i++) { 6866 const PetscInt *idxs; 6867 PetscInt nl,j; 6868 6869 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6870 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6871 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6872 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6873 } 6874 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6875 pcbddc->mat_graph->local_subs = local_subs; 6876 } 6877 } 6878 6879 if (!pcbddc->graphanalyzed) { 6880 /* Graph's connected components analysis */ 6881 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6882 pcbddc->graphanalyzed = PETSC_TRUE; 6883 } 6884 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6885 PetscFunctionReturn(0); 6886 } 6887 6888 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6889 { 6890 PetscInt i,j; 6891 PetscScalar *alphas; 6892 PetscErrorCode ierr; 6893 6894 PetscFunctionBegin; 6895 if (!n) PetscFunctionReturn(0); 6896 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6897 ierr = VecNormalize(vecs[0],NULL);CHKERRQ(ierr); 6898 for (i=1;i<n;i++) { 6899 ierr = VecMDot(vecs[i],i,vecs,alphas);CHKERRQ(ierr); 6900 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 6901 ierr = VecMAXPY(vecs[i],i,alphas,vecs);CHKERRQ(ierr); 6902 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6903 } 6904 ierr = PetscFree(alphas);CHKERRQ(ierr); 6905 PetscFunctionReturn(0); 6906 } 6907 6908 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6909 { 6910 Mat A; 6911 PetscInt n_neighs,*neighs,*n_shared,**shared; 6912 PetscMPIInt size,rank,color; 6913 PetscInt *xadj,*adjncy; 6914 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6915 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6916 PetscInt void_procs,*procs_candidates = NULL; 6917 PetscInt xadj_count,*count; 6918 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6919 PetscSubcomm psubcomm; 6920 MPI_Comm subcomm; 6921 PetscErrorCode ierr; 6922 6923 PetscFunctionBegin; 6924 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6925 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6926 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6927 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6928 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6929 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6930 6931 if (have_void) *have_void = PETSC_FALSE; 6932 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6933 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6934 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6935 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6936 im_active = !!n; 6937 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6938 void_procs = size - active_procs; 6939 /* get ranks of of non-active processes in mat communicator */ 6940 if (void_procs) { 6941 PetscInt ncand; 6942 6943 if (have_void) *have_void = PETSC_TRUE; 6944 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6945 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6946 for (i=0,ncand=0;i<size;i++) { 6947 if (!procs_candidates[i]) { 6948 procs_candidates[ncand++] = i; 6949 } 6950 } 6951 /* force n_subdomains to be not greater that the number of non-active processes */ 6952 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6953 } 6954 6955 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6956 number of subdomains requested 1 -> send to master or first candidate in voids */ 6957 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6958 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6959 PetscInt issize,isidx,dest; 6960 if (*n_subdomains == 1) dest = 0; 6961 else dest = rank; 6962 if (im_active) { 6963 issize = 1; 6964 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6965 isidx = procs_candidates[dest]; 6966 } else { 6967 isidx = dest; 6968 } 6969 } else { 6970 issize = 0; 6971 isidx = -1; 6972 } 6973 if (*n_subdomains != 1) *n_subdomains = active_procs; 6974 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6975 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6976 PetscFunctionReturn(0); 6977 } 6978 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6979 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6980 threshold = PetscMax(threshold,2); 6981 6982 /* Get info on mapping */ 6983 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6984 6985 /* build local CSR graph of subdomains' connectivity */ 6986 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6987 xadj[0] = 0; 6988 xadj[1] = PetscMax(n_neighs-1,0); 6989 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6990 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6991 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6992 for (i=1;i<n_neighs;i++) 6993 for (j=0;j<n_shared[i];j++) 6994 count[shared[i][j]] += 1; 6995 6996 xadj_count = 0; 6997 for (i=1;i<n_neighs;i++) { 6998 for (j=0;j<n_shared[i];j++) { 6999 if (count[shared[i][j]] < threshold) { 7000 adjncy[xadj_count] = neighs[i]; 7001 adjncy_wgt[xadj_count] = n_shared[i]; 7002 xadj_count++; 7003 break; 7004 } 7005 } 7006 } 7007 xadj[1] = xadj_count; 7008 ierr = PetscFree(count);CHKERRQ(ierr); 7009 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7010 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7011 7012 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 7013 7014 /* Restrict work on active processes only */ 7015 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 7016 if (void_procs) { 7017 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 7018 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 7019 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 7020 subcomm = PetscSubcommChild(psubcomm); 7021 } else { 7022 psubcomm = NULL; 7023 subcomm = PetscObjectComm((PetscObject)mat); 7024 } 7025 7026 v_wgt = NULL; 7027 if (!color) { 7028 ierr = PetscFree(xadj);CHKERRQ(ierr); 7029 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7030 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7031 } else { 7032 Mat subdomain_adj; 7033 IS new_ranks,new_ranks_contig; 7034 MatPartitioning partitioner; 7035 PetscInt rstart=0,rend=0; 7036 PetscInt *is_indices,*oldranks; 7037 PetscMPIInt size; 7038 PetscBool aggregate; 7039 7040 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 7041 if (void_procs) { 7042 PetscInt prank = rank; 7043 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 7044 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 7045 for (i=0;i<xadj[1];i++) { 7046 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 7047 } 7048 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7049 } else { 7050 oldranks = NULL; 7051 } 7052 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7053 if (aggregate) { /* TODO: all this part could be made more efficient */ 7054 PetscInt lrows,row,ncols,*cols; 7055 PetscMPIInt nrank; 7056 PetscScalar *vals; 7057 7058 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 7059 lrows = 0; 7060 if (nrank<redprocs) { 7061 lrows = size/redprocs; 7062 if (nrank<size%redprocs) lrows++; 7063 } 7064 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 7065 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 7066 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7067 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7068 row = nrank; 7069 ncols = xadj[1]-xadj[0]; 7070 cols = adjncy; 7071 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 7072 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7073 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 7074 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7075 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7076 ierr = PetscFree(xadj);CHKERRQ(ierr); 7077 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7078 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7079 ierr = PetscFree(vals);CHKERRQ(ierr); 7080 if (use_vwgt) { 7081 Vec v; 7082 const PetscScalar *array; 7083 PetscInt nl; 7084 7085 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 7086 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 7087 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 7088 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 7089 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 7090 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 7091 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 7092 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7093 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 7094 ierr = VecDestroy(&v);CHKERRQ(ierr); 7095 } 7096 } else { 7097 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 7098 if (use_vwgt) { 7099 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 7100 v_wgt[0] = n; 7101 } 7102 } 7103 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 7104 7105 /* Partition */ 7106 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 7107 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 7108 if (v_wgt) { 7109 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 7110 } 7111 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7112 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 7113 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 7114 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 7115 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 7116 7117 /* renumber new_ranks to avoid "holes" in new set of processors */ 7118 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 7119 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 7120 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7121 if (!aggregate) { 7122 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7123 #if defined(PETSC_USE_DEBUG) 7124 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7125 #endif 7126 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7127 } else if (oldranks) { 7128 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7129 } else { 7130 ranks_send_to_idx[0] = is_indices[0]; 7131 } 7132 } else { 7133 PetscInt idx = 0; 7134 PetscMPIInt tag; 7135 MPI_Request *reqs; 7136 7137 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 7138 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 7139 for (i=rstart;i<rend;i++) { 7140 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 7141 } 7142 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 7143 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7144 ierr = PetscFree(reqs);CHKERRQ(ierr); 7145 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7146 #if defined(PETSC_USE_DEBUG) 7147 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7148 #endif 7149 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7150 } else if (oldranks) { 7151 ranks_send_to_idx[0] = oldranks[idx]; 7152 } else { 7153 ranks_send_to_idx[0] = idx; 7154 } 7155 } 7156 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7157 /* clean up */ 7158 ierr = PetscFree(oldranks);CHKERRQ(ierr); 7159 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 7160 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 7161 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 7162 } 7163 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 7164 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7165 7166 /* assemble parallel IS for sends */ 7167 i = 1; 7168 if (!color) i=0; 7169 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 7170 PetscFunctionReturn(0); 7171 } 7172 7173 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7174 7175 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7176 { 7177 Mat local_mat; 7178 IS is_sends_internal; 7179 PetscInt rows,cols,new_local_rows; 7180 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7181 PetscBool ismatis,isdense,newisdense,destroy_mat; 7182 ISLocalToGlobalMapping l2gmap; 7183 PetscInt* l2gmap_indices; 7184 const PetscInt* is_indices; 7185 MatType new_local_type; 7186 /* buffers */ 7187 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7188 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7189 PetscInt *recv_buffer_idxs_local; 7190 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 7191 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7192 /* MPI */ 7193 MPI_Comm comm,comm_n; 7194 PetscSubcomm subcomm; 7195 PetscMPIInt n_sends,n_recvs,commsize; 7196 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7197 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7198 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7199 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7200 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7201 PetscErrorCode ierr; 7202 7203 PetscFunctionBegin; 7204 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7205 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7206 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7207 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7208 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7209 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7210 PetscValidLogicalCollectiveBool(mat,reuse,6); 7211 PetscValidLogicalCollectiveInt(mat,nis,8); 7212 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7213 if (nvecs) { 7214 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7215 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7216 } 7217 /* further checks */ 7218 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7219 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7220 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7221 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7222 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7223 if (reuse && *mat_n) { 7224 PetscInt mrows,mcols,mnrows,mncols; 7225 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7226 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7227 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7228 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7229 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7230 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7231 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7232 } 7233 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7234 PetscValidLogicalCollectiveInt(mat,bs,0); 7235 7236 /* prepare IS for sending if not provided */ 7237 if (!is_sends) { 7238 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7239 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7240 } else { 7241 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7242 is_sends_internal = is_sends; 7243 } 7244 7245 /* get comm */ 7246 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7247 7248 /* compute number of sends */ 7249 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7250 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7251 7252 /* compute number of receives */ 7253 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 7254 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 7255 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 7256 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7257 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7258 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7259 ierr = PetscFree(iflags);CHKERRQ(ierr); 7260 7261 /* restrict comm if requested */ 7262 subcomm = 0; 7263 destroy_mat = PETSC_FALSE; 7264 if (restrict_comm) { 7265 PetscMPIInt color,subcommsize; 7266 7267 color = 0; 7268 if (restrict_full) { 7269 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7270 } else { 7271 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7272 } 7273 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7274 subcommsize = commsize - subcommsize; 7275 /* check if reuse has been requested */ 7276 if (reuse) { 7277 if (*mat_n) { 7278 PetscMPIInt subcommsize2; 7279 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 7280 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7281 comm_n = PetscObjectComm((PetscObject)*mat_n); 7282 } else { 7283 comm_n = PETSC_COMM_SELF; 7284 } 7285 } else { /* MAT_INITIAL_MATRIX */ 7286 PetscMPIInt rank; 7287 7288 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 7289 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7290 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7291 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7292 comm_n = PetscSubcommChild(subcomm); 7293 } 7294 /* flag to destroy *mat_n if not significative */ 7295 if (color) destroy_mat = PETSC_TRUE; 7296 } else { 7297 comm_n = comm; 7298 } 7299 7300 /* prepare send/receive buffers */ 7301 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 7302 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 7303 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 7304 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 7305 if (nis) { 7306 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 7307 } 7308 7309 /* Get data from local matrices */ 7310 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7311 /* TODO: See below some guidelines on how to prepare the local buffers */ 7312 /* 7313 send_buffer_vals should contain the raw values of the local matrix 7314 send_buffer_idxs should contain: 7315 - MatType_PRIVATE type 7316 - PetscInt size_of_l2gmap 7317 - PetscInt global_row_indices[size_of_l2gmap] 7318 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7319 */ 7320 else { 7321 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7322 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7323 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7324 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7325 send_buffer_idxs[1] = i; 7326 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7327 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 7328 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7329 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7330 for (i=0;i<n_sends;i++) { 7331 ilengths_vals[is_indices[i]] = len*len; 7332 ilengths_idxs[is_indices[i]] = len+2; 7333 } 7334 } 7335 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7336 /* additional is (if any) */ 7337 if (nis) { 7338 PetscMPIInt psum; 7339 PetscInt j; 7340 for (j=0,psum=0;j<nis;j++) { 7341 PetscInt plen; 7342 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7343 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7344 psum += len+1; /* indices + lenght */ 7345 } 7346 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7347 for (j=0,psum=0;j<nis;j++) { 7348 PetscInt plen; 7349 const PetscInt *is_array_idxs; 7350 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7351 send_buffer_idxs_is[psum] = plen; 7352 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7353 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 7354 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7355 psum += plen+1; /* indices + lenght */ 7356 } 7357 for (i=0;i<n_sends;i++) { 7358 ilengths_idxs_is[is_indices[i]] = psum; 7359 } 7360 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7361 } 7362 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7363 7364 buf_size_idxs = 0; 7365 buf_size_vals = 0; 7366 buf_size_idxs_is = 0; 7367 buf_size_vecs = 0; 7368 for (i=0;i<n_recvs;i++) { 7369 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7370 buf_size_vals += (PetscInt)olengths_vals[i]; 7371 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7372 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7373 } 7374 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7375 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7376 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7377 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7378 7379 /* get new tags for clean communications */ 7380 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7381 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7382 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7383 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7384 7385 /* allocate for requests */ 7386 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7387 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7388 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7389 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7390 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7391 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7392 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7393 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7394 7395 /* communications */ 7396 ptr_idxs = recv_buffer_idxs; 7397 ptr_vals = recv_buffer_vals; 7398 ptr_idxs_is = recv_buffer_idxs_is; 7399 ptr_vecs = recv_buffer_vecs; 7400 for (i=0;i<n_recvs;i++) { 7401 source_dest = onodes[i]; 7402 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7403 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7404 ptr_idxs += olengths_idxs[i]; 7405 ptr_vals += olengths_vals[i]; 7406 if (nis) { 7407 source_dest = onodes_is[i]; 7408 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7409 ptr_idxs_is += olengths_idxs_is[i]; 7410 } 7411 if (nvecs) { 7412 source_dest = onodes[i]; 7413 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7414 ptr_vecs += olengths_idxs[i]-2; 7415 } 7416 } 7417 for (i=0;i<n_sends;i++) { 7418 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7419 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7420 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7421 if (nis) { 7422 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7423 } 7424 if (nvecs) { 7425 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7426 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7427 } 7428 } 7429 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7430 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7431 7432 /* assemble new l2g map */ 7433 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7434 ptr_idxs = recv_buffer_idxs; 7435 new_local_rows = 0; 7436 for (i=0;i<n_recvs;i++) { 7437 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7438 ptr_idxs += olengths_idxs[i]; 7439 } 7440 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7441 ptr_idxs = recv_buffer_idxs; 7442 new_local_rows = 0; 7443 for (i=0;i<n_recvs;i++) { 7444 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 7445 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7446 ptr_idxs += olengths_idxs[i]; 7447 } 7448 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7449 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7450 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7451 7452 /* infer new local matrix type from received local matrices type */ 7453 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7454 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7455 if (n_recvs) { 7456 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7457 ptr_idxs = recv_buffer_idxs; 7458 for (i=0;i<n_recvs;i++) { 7459 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7460 new_local_type_private = MATAIJ_PRIVATE; 7461 break; 7462 } 7463 ptr_idxs += olengths_idxs[i]; 7464 } 7465 switch (new_local_type_private) { 7466 case MATDENSE_PRIVATE: 7467 new_local_type = MATSEQAIJ; 7468 bs = 1; 7469 break; 7470 case MATAIJ_PRIVATE: 7471 new_local_type = MATSEQAIJ; 7472 bs = 1; 7473 break; 7474 case MATBAIJ_PRIVATE: 7475 new_local_type = MATSEQBAIJ; 7476 break; 7477 case MATSBAIJ_PRIVATE: 7478 new_local_type = MATSEQSBAIJ; 7479 break; 7480 default: 7481 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7482 break; 7483 } 7484 } else { /* by default, new_local_type is seqaij */ 7485 new_local_type = MATSEQAIJ; 7486 bs = 1; 7487 } 7488 7489 /* create MATIS object if needed */ 7490 if (!reuse) { 7491 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7492 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7493 } else { 7494 /* it also destroys the local matrices */ 7495 if (*mat_n) { 7496 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7497 } else { /* this is a fake object */ 7498 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7499 } 7500 } 7501 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7502 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7503 7504 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7505 7506 /* Global to local map of received indices */ 7507 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7508 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7509 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7510 7511 /* restore attributes -> type of incoming data and its size */ 7512 buf_size_idxs = 0; 7513 for (i=0;i<n_recvs;i++) { 7514 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7515 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7516 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7517 } 7518 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7519 7520 /* set preallocation */ 7521 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7522 if (!newisdense) { 7523 PetscInt *new_local_nnz=0; 7524 7525 ptr_idxs = recv_buffer_idxs_local; 7526 if (n_recvs) { 7527 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7528 } 7529 for (i=0;i<n_recvs;i++) { 7530 PetscInt j; 7531 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7532 for (j=0;j<*(ptr_idxs+1);j++) { 7533 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7534 } 7535 } else { 7536 /* TODO */ 7537 } 7538 ptr_idxs += olengths_idxs[i]; 7539 } 7540 if (new_local_nnz) { 7541 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7542 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7543 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7544 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7545 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7546 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7547 } else { 7548 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7549 } 7550 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7551 } else { 7552 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7553 } 7554 7555 /* set values */ 7556 ptr_vals = recv_buffer_vals; 7557 ptr_idxs = recv_buffer_idxs_local; 7558 for (i=0;i<n_recvs;i++) { 7559 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7560 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7561 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7562 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7563 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7564 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7565 } else { 7566 /* TODO */ 7567 } 7568 ptr_idxs += olengths_idxs[i]; 7569 ptr_vals += olengths_vals[i]; 7570 } 7571 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7572 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7573 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7574 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7575 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7576 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7577 7578 #if 0 7579 if (!restrict_comm) { /* check */ 7580 Vec lvec,rvec; 7581 PetscReal infty_error; 7582 7583 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7584 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7585 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7586 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7587 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7588 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7589 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7590 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7591 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7592 } 7593 #endif 7594 7595 /* assemble new additional is (if any) */ 7596 if (nis) { 7597 PetscInt **temp_idxs,*count_is,j,psum; 7598 7599 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7600 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7601 ptr_idxs = recv_buffer_idxs_is; 7602 psum = 0; 7603 for (i=0;i<n_recvs;i++) { 7604 for (j=0;j<nis;j++) { 7605 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7606 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7607 psum += plen; 7608 ptr_idxs += plen+1; /* shift pointer to received data */ 7609 } 7610 } 7611 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7612 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7613 for (i=1;i<nis;i++) { 7614 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7615 } 7616 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7617 ptr_idxs = recv_buffer_idxs_is; 7618 for (i=0;i<n_recvs;i++) { 7619 for (j=0;j<nis;j++) { 7620 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7621 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7622 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7623 ptr_idxs += plen+1; /* shift pointer to received data */ 7624 } 7625 } 7626 for (i=0;i<nis;i++) { 7627 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7628 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7629 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7630 } 7631 ierr = PetscFree(count_is);CHKERRQ(ierr); 7632 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7633 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7634 } 7635 /* free workspace */ 7636 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7637 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7638 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7639 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7640 if (isdense) { 7641 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7642 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7643 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7644 } else { 7645 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7646 } 7647 if (nis) { 7648 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7649 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7650 } 7651 7652 if (nvecs) { 7653 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7654 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7655 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7656 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7657 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7658 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7659 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7660 /* set values */ 7661 ptr_vals = recv_buffer_vecs; 7662 ptr_idxs = recv_buffer_idxs_local; 7663 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7664 for (i=0;i<n_recvs;i++) { 7665 PetscInt j; 7666 for (j=0;j<*(ptr_idxs+1);j++) { 7667 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7668 } 7669 ptr_idxs += olengths_idxs[i]; 7670 ptr_vals += olengths_idxs[i]-2; 7671 } 7672 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7673 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7674 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7675 } 7676 7677 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7678 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7679 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7680 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7681 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7682 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7683 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7684 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7685 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7686 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7687 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7688 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7689 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7690 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7691 ierr = PetscFree(onodes);CHKERRQ(ierr); 7692 if (nis) { 7693 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7694 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7695 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7696 } 7697 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7698 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7699 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7700 for (i=0;i<nis;i++) { 7701 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7702 } 7703 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7704 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7705 } 7706 *mat_n = NULL; 7707 } 7708 PetscFunctionReturn(0); 7709 } 7710 7711 /* temporary hack into ksp private data structure */ 7712 #include <petsc/private/kspimpl.h> 7713 7714 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7715 { 7716 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7717 PC_IS *pcis = (PC_IS*)pc->data; 7718 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7719 Mat coarsedivudotp = NULL; 7720 Mat coarseG,t_coarse_mat_is; 7721 MatNullSpace CoarseNullSpace = NULL; 7722 ISLocalToGlobalMapping coarse_islg; 7723 IS coarse_is,*isarray; 7724 PetscInt i,im_active=-1,active_procs=-1; 7725 PetscInt nis,nisdofs,nisneu,nisvert; 7726 PC pc_temp; 7727 PCType coarse_pc_type; 7728 KSPType coarse_ksp_type; 7729 PetscBool multilevel_requested,multilevel_allowed; 7730 PetscBool coarse_reuse; 7731 PetscInt ncoarse,nedcfield; 7732 PetscBool compute_vecs = PETSC_FALSE; 7733 PetscScalar *array; 7734 MatReuse coarse_mat_reuse; 7735 PetscBool restr, full_restr, have_void; 7736 PetscMPIInt commsize; 7737 PetscErrorCode ierr; 7738 7739 PetscFunctionBegin; 7740 /* Assign global numbering to coarse dofs */ 7741 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7742 PetscInt ocoarse_size; 7743 compute_vecs = PETSC_TRUE; 7744 7745 pcbddc->new_primal_space = PETSC_TRUE; 7746 ocoarse_size = pcbddc->coarse_size; 7747 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7748 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7749 /* see if we can avoid some work */ 7750 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7751 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7752 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7753 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7754 coarse_reuse = PETSC_FALSE; 7755 } else { /* we can safely reuse already computed coarse matrix */ 7756 coarse_reuse = PETSC_TRUE; 7757 } 7758 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7759 coarse_reuse = PETSC_FALSE; 7760 } 7761 /* reset any subassembling information */ 7762 if (!coarse_reuse || pcbddc->recompute_topography) { 7763 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7764 } 7765 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7766 coarse_reuse = PETSC_TRUE; 7767 } 7768 /* assemble coarse matrix */ 7769 if (coarse_reuse && pcbddc->coarse_ksp) { 7770 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7771 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7772 coarse_mat_reuse = MAT_REUSE_MATRIX; 7773 } else { 7774 coarse_mat = NULL; 7775 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7776 } 7777 7778 /* creates temporary l2gmap and IS for coarse indexes */ 7779 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7780 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7781 7782 /* creates temporary MATIS object for coarse matrix */ 7783 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7784 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7785 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7786 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7787 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7788 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7789 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7790 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7791 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7792 7793 /* count "active" (i.e. with positive local size) and "void" processes */ 7794 im_active = !!(pcis->n); 7795 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7796 7797 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7798 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7799 /* full_restr : just use the receivers from the subassembling pattern */ 7800 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);CHKERRQ(ierr); 7801 coarse_mat_is = NULL; 7802 multilevel_allowed = PETSC_FALSE; 7803 multilevel_requested = PETSC_FALSE; 7804 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7805 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7806 if (multilevel_requested) { 7807 ncoarse = active_procs/pcbddc->coarsening_ratio; 7808 restr = PETSC_FALSE; 7809 full_restr = PETSC_FALSE; 7810 } else { 7811 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7812 restr = PETSC_TRUE; 7813 full_restr = PETSC_TRUE; 7814 } 7815 if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7816 ncoarse = PetscMax(1,ncoarse); 7817 if (!pcbddc->coarse_subassembling) { 7818 if (pcbddc->coarsening_ratio > 1) { 7819 if (multilevel_requested) { 7820 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7821 } else { 7822 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7823 } 7824 } else { 7825 PetscMPIInt rank; 7826 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7827 have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE; 7828 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7829 } 7830 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7831 PetscInt psum; 7832 if (pcbddc->coarse_ksp) psum = 1; 7833 else psum = 0; 7834 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7835 if (ncoarse < commsize) have_void = PETSC_TRUE; 7836 } 7837 /* determine if we can go multilevel */ 7838 if (multilevel_requested) { 7839 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7840 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7841 } 7842 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7843 7844 /* dump subassembling pattern */ 7845 if (pcbddc->dbg_flag && multilevel_allowed) { 7846 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7847 } 7848 7849 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7850 nedcfield = -1; 7851 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7852 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7853 const PetscInt *idxs; 7854 ISLocalToGlobalMapping tmap; 7855 7856 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7857 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7858 /* allocate space for temporary storage */ 7859 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7860 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7861 /* allocate for IS array */ 7862 nisdofs = pcbddc->n_ISForDofsLocal; 7863 if (pcbddc->nedclocal) { 7864 if (pcbddc->nedfield > -1) { 7865 nedcfield = pcbddc->nedfield; 7866 } else { 7867 nedcfield = 0; 7868 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7869 nisdofs = 1; 7870 } 7871 } 7872 nisneu = !!pcbddc->NeumannBoundariesLocal; 7873 nisvert = 0; /* nisvert is not used */ 7874 nis = nisdofs + nisneu + nisvert; 7875 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7876 /* dofs splitting */ 7877 for (i=0;i<nisdofs;i++) { 7878 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7879 if (nedcfield != i) { 7880 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7881 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7882 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7883 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7884 } else { 7885 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7886 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7887 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7888 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7889 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7890 } 7891 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7892 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7893 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7894 } 7895 /* neumann boundaries */ 7896 if (pcbddc->NeumannBoundariesLocal) { 7897 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7898 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7899 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7900 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7901 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7902 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7903 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7904 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7905 } 7906 /* free memory */ 7907 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7908 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7909 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7910 } else { 7911 nis = 0; 7912 nisdofs = 0; 7913 nisneu = 0; 7914 nisvert = 0; 7915 isarray = NULL; 7916 } 7917 /* destroy no longer needed map */ 7918 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7919 7920 /* subassemble */ 7921 if (multilevel_allowed) { 7922 Vec vp[1]; 7923 PetscInt nvecs = 0; 7924 PetscBool reuse,reuser; 7925 7926 if (coarse_mat) reuse = PETSC_TRUE; 7927 else reuse = PETSC_FALSE; 7928 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7929 vp[0] = NULL; 7930 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7931 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7932 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7933 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7934 nvecs = 1; 7935 7936 if (pcbddc->divudotp) { 7937 Mat B,loc_divudotp; 7938 Vec v,p; 7939 IS dummy; 7940 PetscInt np; 7941 7942 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7943 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7944 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7945 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7946 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7947 ierr = VecSet(p,1.);CHKERRQ(ierr); 7948 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7949 ierr = VecDestroy(&p);CHKERRQ(ierr); 7950 ierr = MatDestroy(&B);CHKERRQ(ierr); 7951 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7952 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7953 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7954 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7955 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7956 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7957 ierr = VecDestroy(&v);CHKERRQ(ierr); 7958 } 7959 } 7960 if (reuser) { 7961 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7962 } else { 7963 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7964 } 7965 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7966 PetscScalar *arraym,*arrayv; 7967 PetscInt nl; 7968 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7969 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7970 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7971 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7972 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7973 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7974 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7975 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7976 } else { 7977 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7978 } 7979 } else { 7980 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7981 } 7982 if (coarse_mat_is || coarse_mat) { 7983 PetscMPIInt size; 7984 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7985 if (!multilevel_allowed) { 7986 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7987 } else { 7988 Mat A; 7989 7990 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7991 if (coarse_mat_is) { 7992 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7993 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7994 coarse_mat = coarse_mat_is; 7995 } 7996 /* be sure we don't have MatSeqDENSE as local mat */ 7997 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7998 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7999 } 8000 } 8001 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 8002 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 8003 8004 /* create local to global scatters for coarse problem */ 8005 if (compute_vecs) { 8006 PetscInt lrows; 8007 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 8008 if (coarse_mat) { 8009 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 8010 } else { 8011 lrows = 0; 8012 } 8013 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 8014 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 8015 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 8016 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8017 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8018 } 8019 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 8020 8021 /* set defaults for coarse KSP and PC */ 8022 if (multilevel_allowed) { 8023 coarse_ksp_type = KSPRICHARDSON; 8024 coarse_pc_type = PCBDDC; 8025 } else { 8026 coarse_ksp_type = KSPPREONLY; 8027 coarse_pc_type = PCREDUNDANT; 8028 } 8029 8030 /* print some info if requested */ 8031 if (pcbddc->dbg_flag) { 8032 if (!multilevel_allowed) { 8033 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8034 if (multilevel_requested) { 8035 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8036 } else if (pcbddc->max_levels) { 8037 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 8038 } 8039 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8040 } 8041 } 8042 8043 /* communicate coarse discrete gradient */ 8044 coarseG = NULL; 8045 if (pcbddc->nedcG && multilevel_allowed) { 8046 MPI_Comm ccomm; 8047 if (coarse_mat) { 8048 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8049 } else { 8050 ccomm = MPI_COMM_NULL; 8051 } 8052 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 8053 } 8054 8055 /* create the coarse KSP object only once with defaults */ 8056 if (coarse_mat) { 8057 PetscBool isredundant,isnn,isbddc; 8058 PetscViewer dbg_viewer = NULL; 8059 8060 if (pcbddc->dbg_flag) { 8061 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8062 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8063 } 8064 if (!pcbddc->coarse_ksp) { 8065 char prefix[256],str_level[16]; 8066 size_t len; 8067 8068 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 8069 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8070 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 8071 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 8072 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8073 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 8074 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 8075 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8076 /* TODO is this logic correct? should check for coarse_mat type */ 8077 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8078 /* prefix */ 8079 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 8080 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 8081 if (!pcbddc->current_level) { 8082 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 8083 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 8084 } else { 8085 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 8086 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8087 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8088 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 8089 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 8090 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 8091 } 8092 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 8093 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8094 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8095 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8096 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8097 /* allow user customization */ 8098 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 8099 } 8100 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8101 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8102 if (nisdofs) { 8103 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 8104 for (i=0;i<nisdofs;i++) { 8105 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8106 } 8107 } 8108 if (nisneu) { 8109 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 8110 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 8111 } 8112 if (nisvert) { 8113 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 8114 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 8115 } 8116 if (coarseG) { 8117 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 8118 } 8119 8120 /* get some info after set from options */ 8121 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8122 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8123 if (isbddc && !multilevel_allowed) { 8124 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8125 isbddc = PETSC_FALSE; 8126 } 8127 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 8128 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 8129 if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) { 8130 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8131 isbddc = PETSC_TRUE; 8132 } 8133 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 8134 if (isredundant) { 8135 KSP inner_ksp; 8136 PC inner_pc; 8137 8138 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 8139 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 8140 } 8141 8142 /* parameters which miss an API */ 8143 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8144 if (isbddc) { 8145 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8146 8147 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8148 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8149 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8150 if (pcbddc_coarse->benign_saddle_point) { 8151 Mat coarsedivudotp_is; 8152 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8153 IS row,col; 8154 const PetscInt *gidxs; 8155 PetscInt n,st,M,N; 8156 8157 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 8158 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 8159 st = st-n; 8160 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 8161 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 8162 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 8163 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8164 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 8165 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8166 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 8167 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 8168 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 8169 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 8170 ierr = ISDestroy(&row);CHKERRQ(ierr); 8171 ierr = ISDestroy(&col);CHKERRQ(ierr); 8172 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 8173 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8174 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8175 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8176 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8177 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8178 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8179 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8180 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8181 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8182 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8183 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8184 } 8185 } 8186 8187 /* propagate symmetry info of coarse matrix */ 8188 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8189 if (pc->pmat->symmetric_set) { 8190 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8191 } 8192 if (pc->pmat->hermitian_set) { 8193 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8194 } 8195 if (pc->pmat->spd_set) { 8196 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8197 } 8198 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8199 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8200 } 8201 /* set operators */ 8202 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8203 if (pcbddc->dbg_flag) { 8204 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8205 } 8206 } 8207 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8208 ierr = PetscFree(isarray);CHKERRQ(ierr); 8209 #if 0 8210 { 8211 PetscViewer viewer; 8212 char filename[256]; 8213 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8214 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8215 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8216 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8217 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8218 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8219 } 8220 #endif 8221 8222 if (pcbddc->coarse_ksp) { 8223 Vec crhs,csol; 8224 8225 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8226 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8227 if (!csol) { 8228 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8229 } 8230 if (!crhs) { 8231 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8232 } 8233 } 8234 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8235 8236 /* compute null space for coarse solver if the benign trick has been requested */ 8237 if (pcbddc->benign_null) { 8238 8239 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8240 for (i=0;i<pcbddc->benign_n;i++) { 8241 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8242 } 8243 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8244 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8245 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8246 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8247 if (coarse_mat) { 8248 Vec nullv; 8249 PetscScalar *array,*array2; 8250 PetscInt nl; 8251 8252 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8253 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8254 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8255 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8256 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 8257 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8258 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8259 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8260 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8261 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8262 } 8263 } 8264 8265 if (pcbddc->coarse_ksp) { 8266 PetscBool ispreonly; 8267 8268 if (CoarseNullSpace) { 8269 PetscBool isnull; 8270 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8271 if (isnull) { 8272 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8273 } 8274 /* TODO: add local nullspaces (if any) */ 8275 } 8276 /* setup coarse ksp */ 8277 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8278 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8279 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8280 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 8281 KSP check_ksp; 8282 KSPType check_ksp_type; 8283 PC check_pc; 8284 Vec check_vec,coarse_vec; 8285 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8286 PetscInt its; 8287 PetscBool compute_eigs; 8288 PetscReal *eigs_r,*eigs_c; 8289 PetscInt neigs; 8290 const char *prefix; 8291 8292 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8293 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8294 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8295 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8296 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8297 /* prevent from setup unneeded object */ 8298 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8299 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8300 if (ispreonly) { 8301 check_ksp_type = KSPPREONLY; 8302 compute_eigs = PETSC_FALSE; 8303 } else { 8304 check_ksp_type = KSPGMRES; 8305 compute_eigs = PETSC_TRUE; 8306 } 8307 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8308 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8309 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8310 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8311 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8312 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8313 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8314 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8315 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8316 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8317 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8318 /* create random vec */ 8319 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8320 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8321 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8322 /* solve coarse problem */ 8323 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8324 /* set eigenvalue estimation if preonly has not been requested */ 8325 if (compute_eigs) { 8326 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8327 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8328 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8329 if (neigs) { 8330 lambda_max = eigs_r[neigs-1]; 8331 lambda_min = eigs_r[0]; 8332 if (pcbddc->use_coarse_estimates) { 8333 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8334 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8335 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8336 } 8337 } 8338 } 8339 } 8340 8341 /* check coarse problem residual error */ 8342 if (pcbddc->dbg_flag) { 8343 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8344 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8345 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8346 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8347 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8348 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8349 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8350 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8351 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8352 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8353 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8354 if (CoarseNullSpace) { 8355 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8356 } 8357 if (compute_eigs) { 8358 PetscReal lambda_max_s,lambda_min_s; 8359 KSPConvergedReason reason; 8360 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8361 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8362 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8363 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8364 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8365 for (i=0;i<neigs;i++) { 8366 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8367 } 8368 } 8369 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8370 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8371 } 8372 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8373 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8374 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8375 if (compute_eigs) { 8376 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8377 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8378 } 8379 } 8380 } 8381 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8382 /* print additional info */ 8383 if (pcbddc->dbg_flag) { 8384 /* waits until all processes reaches this point */ 8385 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8386 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 8387 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8388 } 8389 8390 /* free memory */ 8391 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8392 PetscFunctionReturn(0); 8393 } 8394 8395 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8396 { 8397 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8398 PC_IS* pcis = (PC_IS*)pc->data; 8399 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8400 IS subset,subset_mult,subset_n; 8401 PetscInt local_size,coarse_size=0; 8402 PetscInt *local_primal_indices=NULL; 8403 const PetscInt *t_local_primal_indices; 8404 PetscErrorCode ierr; 8405 8406 PetscFunctionBegin; 8407 /* Compute global number of coarse dofs */ 8408 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8409 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8410 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8411 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8412 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8413 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8414 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8415 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8416 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8417 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8418 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8419 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8420 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 8421 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8422 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8423 8424 /* check numbering */ 8425 if (pcbddc->dbg_flag) { 8426 PetscScalar coarsesum,*array,*array2; 8427 PetscInt i; 8428 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8429 8430 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8431 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8432 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8433 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8434 /* counter */ 8435 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8436 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8437 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8438 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8439 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8440 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8441 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8442 for (i=0;i<pcbddc->local_primal_size;i++) { 8443 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8444 } 8445 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8446 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8447 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8448 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8449 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8450 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8451 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8452 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8453 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8454 for (i=0;i<pcis->n;i++) { 8455 if (array[i] != 0.0 && array[i] != array2[i]) { 8456 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8457 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8458 set_error = PETSC_TRUE; 8459 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8460 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8461 } 8462 } 8463 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8464 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8465 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8466 for (i=0;i<pcis->n;i++) { 8467 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8468 } 8469 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8470 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8471 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8472 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8473 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8474 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8475 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8476 PetscInt *gidxs; 8477 8478 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8479 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8480 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8481 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8482 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8483 for (i=0;i<pcbddc->local_primal_size;i++) { 8484 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8485 } 8486 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8487 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8488 } 8489 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8490 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8491 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8492 } 8493 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 8494 /* get back data */ 8495 *coarse_size_n = coarse_size; 8496 *local_primal_indices_n = local_primal_indices; 8497 PetscFunctionReturn(0); 8498 } 8499 8500 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8501 { 8502 IS localis_t; 8503 PetscInt i,lsize,*idxs,n; 8504 PetscScalar *vals; 8505 PetscErrorCode ierr; 8506 8507 PetscFunctionBegin; 8508 /* get indices in local ordering exploiting local to global map */ 8509 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8510 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8511 for (i=0;i<lsize;i++) vals[i] = 1.0; 8512 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8513 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8514 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8515 if (idxs) { /* multilevel guard */ 8516 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 8517 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8518 } 8519 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8520 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8521 ierr = PetscFree(vals);CHKERRQ(ierr); 8522 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8523 /* now compute set in local ordering */ 8524 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8525 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8526 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8527 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8528 for (i=0,lsize=0;i<n;i++) { 8529 if (PetscRealPart(vals[i]) > 0.5) { 8530 lsize++; 8531 } 8532 } 8533 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8534 for (i=0,lsize=0;i<n;i++) { 8535 if (PetscRealPart(vals[i]) > 0.5) { 8536 idxs[lsize++] = i; 8537 } 8538 } 8539 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8540 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8541 *localis = localis_t; 8542 PetscFunctionReturn(0); 8543 } 8544 8545 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8546 { 8547 PC_IS *pcis=(PC_IS*)pc->data; 8548 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8549 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8550 Mat S_j; 8551 PetscInt *used_xadj,*used_adjncy; 8552 PetscBool free_used_adj; 8553 PetscErrorCode ierr; 8554 8555 PetscFunctionBegin; 8556 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8557 free_used_adj = PETSC_FALSE; 8558 if (pcbddc->sub_schurs_layers == -1) { 8559 used_xadj = NULL; 8560 used_adjncy = NULL; 8561 } else { 8562 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8563 used_xadj = pcbddc->mat_graph->xadj; 8564 used_adjncy = pcbddc->mat_graph->adjncy; 8565 } else if (pcbddc->computed_rowadj) { 8566 used_xadj = pcbddc->mat_graph->xadj; 8567 used_adjncy = pcbddc->mat_graph->adjncy; 8568 } else { 8569 PetscBool flg_row=PETSC_FALSE; 8570 const PetscInt *xadj,*adjncy; 8571 PetscInt nvtxs; 8572 8573 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8574 if (flg_row) { 8575 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8576 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8577 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8578 free_used_adj = PETSC_TRUE; 8579 } else { 8580 pcbddc->sub_schurs_layers = -1; 8581 used_xadj = NULL; 8582 used_adjncy = NULL; 8583 } 8584 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8585 } 8586 } 8587 8588 /* setup sub_schurs data */ 8589 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8590 if (!sub_schurs->schur_explicit) { 8591 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8592 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8593 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8594 } else { 8595 Mat change = NULL; 8596 Vec scaling = NULL; 8597 IS change_primal = NULL, iP; 8598 PetscInt benign_n; 8599 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8600 PetscBool isseqaij,need_change = PETSC_FALSE; 8601 PetscBool discrete_harmonic = PETSC_FALSE; 8602 8603 if (!pcbddc->use_vertices && reuse_solvers) { 8604 PetscInt n_vertices; 8605 8606 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8607 reuse_solvers = (PetscBool)!n_vertices; 8608 } 8609 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8610 if (!isseqaij) { 8611 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8612 if (matis->A == pcbddc->local_mat) { 8613 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8614 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8615 } else { 8616 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8617 } 8618 } 8619 if (!pcbddc->benign_change_explicit) { 8620 benign_n = pcbddc->benign_n; 8621 } else { 8622 benign_n = 0; 8623 } 8624 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8625 We need a global reduction to avoid possible deadlocks. 8626 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8627 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8628 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8629 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8630 need_change = (PetscBool)(!need_change); 8631 } 8632 /* If the user defines additional constraints, we import them here. 8633 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8634 if (need_change) { 8635 PC_IS *pcisf; 8636 PC_BDDC *pcbddcf; 8637 PC pcf; 8638 8639 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8640 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8641 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8642 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8643 8644 /* hacks */ 8645 pcisf = (PC_IS*)pcf->data; 8646 pcisf->is_B_local = pcis->is_B_local; 8647 pcisf->vec1_N = pcis->vec1_N; 8648 pcisf->BtoNmap = pcis->BtoNmap; 8649 pcisf->n = pcis->n; 8650 pcisf->n_B = pcis->n_B; 8651 pcbddcf = (PC_BDDC*)pcf->data; 8652 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8653 pcbddcf->mat_graph = pcbddc->mat_graph; 8654 pcbddcf->use_faces = PETSC_TRUE; 8655 pcbddcf->use_change_of_basis = PETSC_TRUE; 8656 pcbddcf->use_change_on_faces = PETSC_TRUE; 8657 pcbddcf->use_qr_single = PETSC_TRUE; 8658 pcbddcf->fake_change = PETSC_TRUE; 8659 8660 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8661 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8662 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8663 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8664 change = pcbddcf->ConstraintMatrix; 8665 pcbddcf->ConstraintMatrix = NULL; 8666 8667 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8668 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8669 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8670 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8671 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8672 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8673 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8674 pcf->ops->destroy = NULL; 8675 pcf->ops->reset = NULL; 8676 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8677 } 8678 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8679 8680 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 8681 if (iP) { 8682 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 8683 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 8684 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8685 } 8686 if (discrete_harmonic) { 8687 Mat A; 8688 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 8689 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 8690 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 8691 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8692 ierr = MatDestroy(&A);CHKERRQ(ierr); 8693 } else { 8694 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8695 } 8696 ierr = MatDestroy(&change);CHKERRQ(ierr); 8697 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8698 } 8699 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8700 8701 /* free adjacency */ 8702 if (free_used_adj) { 8703 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8704 } 8705 PetscFunctionReturn(0); 8706 } 8707 8708 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8709 { 8710 PC_IS *pcis=(PC_IS*)pc->data; 8711 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8712 PCBDDCGraph graph; 8713 PetscErrorCode ierr; 8714 8715 PetscFunctionBegin; 8716 /* attach interface graph for determining subsets */ 8717 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8718 IS verticesIS,verticescomm; 8719 PetscInt vsize,*idxs; 8720 8721 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8722 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8723 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8724 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8725 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8726 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8727 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8728 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8729 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8730 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8731 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8732 } else { 8733 graph = pcbddc->mat_graph; 8734 } 8735 /* print some info */ 8736 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8737 IS vertices; 8738 PetscInt nv,nedges,nfaces; 8739 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8740 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8741 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8742 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8743 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8744 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8745 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8746 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8747 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8748 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8749 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8750 } 8751 8752 /* sub_schurs init */ 8753 if (!pcbddc->sub_schurs) { 8754 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8755 } 8756 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8757 8758 /* free graph struct */ 8759 if (pcbddc->sub_schurs_rebuild) { 8760 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8761 } 8762 PetscFunctionReturn(0); 8763 } 8764 8765 PetscErrorCode PCBDDCCheckOperator(PC pc) 8766 { 8767 PC_IS *pcis=(PC_IS*)pc->data; 8768 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8769 PetscErrorCode ierr; 8770 8771 PetscFunctionBegin; 8772 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8773 IS zerodiag = NULL; 8774 Mat S_j,B0_B=NULL; 8775 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8776 PetscScalar *p0_check,*array,*array2; 8777 PetscReal norm; 8778 PetscInt i; 8779 8780 /* B0 and B0_B */ 8781 if (zerodiag) { 8782 IS dummy; 8783 8784 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8785 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8786 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8787 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8788 } 8789 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8790 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8791 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8792 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8793 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8794 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8795 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8796 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8797 /* S_j */ 8798 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8799 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8800 8801 /* mimic vector in \widetilde{W}_\Gamma */ 8802 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8803 /* continuous in primal space */ 8804 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8805 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8806 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8807 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8808 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8809 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8810 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8811 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8812 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8813 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8814 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8815 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8816 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8817 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8818 8819 /* assemble rhs for coarse problem */ 8820 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8821 /* local with Schur */ 8822 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8823 if (zerodiag) { 8824 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8825 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8826 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8827 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8828 } 8829 /* sum on primal nodes the local contributions */ 8830 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8831 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8832 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8833 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8834 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8835 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8836 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8837 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8838 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8839 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8840 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8841 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8842 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8843 /* scale primal nodes (BDDC sums contibutions) */ 8844 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8845 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8846 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8847 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8848 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8849 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8850 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8851 /* global: \widetilde{B0}_B w_\Gamma */ 8852 if (zerodiag) { 8853 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8854 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8855 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8856 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8857 } 8858 /* BDDC */ 8859 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8860 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8861 8862 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8863 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8864 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8865 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8866 for (i=0;i<pcbddc->benign_n;i++) { 8867 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8868 } 8869 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8870 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8871 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8872 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8873 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8874 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8875 } 8876 PetscFunctionReturn(0); 8877 } 8878 8879 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8880 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8881 { 8882 Mat At; 8883 IS rows; 8884 PetscInt rst,ren; 8885 PetscErrorCode ierr; 8886 PetscLayout rmap; 8887 8888 PetscFunctionBegin; 8889 rst = ren = 0; 8890 if (ccomm != MPI_COMM_NULL) { 8891 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8892 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8893 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8894 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8895 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8896 } 8897 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8898 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8899 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8900 8901 if (ccomm != MPI_COMM_NULL) { 8902 Mat_MPIAIJ *a,*b; 8903 IS from,to; 8904 Vec gvec; 8905 PetscInt lsize; 8906 8907 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8908 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8909 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8910 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8911 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8912 a = (Mat_MPIAIJ*)At->data; 8913 b = (Mat_MPIAIJ*)(*B)->data; 8914 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8915 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8916 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8917 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8918 b->A = a->A; 8919 b->B = a->B; 8920 8921 b->donotstash = a->donotstash; 8922 b->roworiented = a->roworiented; 8923 b->rowindices = 0; 8924 b->rowvalues = 0; 8925 b->getrowactive = PETSC_FALSE; 8926 8927 (*B)->rmap = rmap; 8928 (*B)->factortype = A->factortype; 8929 (*B)->assembled = PETSC_TRUE; 8930 (*B)->insertmode = NOT_SET_VALUES; 8931 (*B)->preallocated = PETSC_TRUE; 8932 8933 if (a->colmap) { 8934 #if defined(PETSC_USE_CTABLE) 8935 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8936 #else 8937 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8938 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8939 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8940 #endif 8941 } else b->colmap = 0; 8942 if (a->garray) { 8943 PetscInt len; 8944 len = a->B->cmap->n; 8945 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8946 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8947 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8948 } else b->garray = 0; 8949 8950 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8951 b->lvec = a->lvec; 8952 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8953 8954 /* cannot use VecScatterCopy */ 8955 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8956 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8957 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8958 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8959 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8960 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8961 ierr = ISDestroy(&from);CHKERRQ(ierr); 8962 ierr = ISDestroy(&to);CHKERRQ(ierr); 8963 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8964 } 8965 ierr = MatDestroy(&At);CHKERRQ(ierr); 8966 PetscFunctionReturn(0); 8967 } 8968