1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 #if !defined(PETSC_USE_COMPLEX) 18 PetscScalar *uwork,*data,*U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM,bN,lwork,lierr,di = 1; 21 PetscInt ulw,i,nr,nc,n; 22 PetscErrorCode ierr; 23 24 PetscFunctionBegin; 25 #if defined(PETSC_MISSING_LAPACK_GESVD) 26 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 27 #else 28 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 29 if (!nr || !nc) PetscFunctionReturn(0); 30 31 /* workspace */ 32 if (!work) { 33 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 34 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 35 } else { 36 ulw = lw; 37 uwork = work; 38 } 39 n = PetscMin(nr,nc); 40 if (!rwork) { 41 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 42 } else { 43 sing = rwork; 44 } 45 46 /* SVD */ 47 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 49 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 50 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 51 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 52 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 53 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 54 ierr = PetscFPTrapPop();CHKERRQ(ierr); 55 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 56 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 57 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 58 if (!rwork) { 59 ierr = PetscFree(sing);CHKERRQ(ierr); 60 } 61 if (!work) { 62 ierr = PetscFree(uwork);CHKERRQ(ierr); 63 } 64 /* create B */ 65 if (!range) { 66 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 67 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 68 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 69 } else { 70 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 71 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 72 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 73 } 74 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 75 ierr = PetscFree(U);CHKERRQ(ierr); 76 #endif 77 #else /* PETSC_USE_COMPLEX */ 78 PetscFunctionBegin; 79 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 80 #endif 81 PetscFunctionReturn(0); 82 } 83 84 /* TODO REMOVE */ 85 #if defined(PRINT_GDET) 86 static int inc = 0; 87 static int lev = 0; 88 #endif 89 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 PetscScalar *vals,v; 122 123 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 124 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 125 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 126 /* v = PetscAbsScalar(vals[0]) */; 127 v = 1.; 128 cvals[0] = vals[0]/v; 129 cvals[1] = vals[1]/v; 130 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 131 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 132 #if defined(PRINT_GDET) 133 { 134 PetscViewer viewer; 135 char filename[256]; 136 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 137 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 138 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 140 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 141 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 142 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 143 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 144 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 145 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 146 } 147 #endif 148 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 149 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 150 } 151 152 PetscFunctionReturn(0); 153 } 154 155 PetscErrorCode PCBDDCNedelecSupport(PC pc) 156 { 157 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 158 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 159 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 160 Vec tvec; 161 PetscSF sfv; 162 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 163 MPI_Comm comm; 164 IS lned,primals,allprimals,nedfieldlocal; 165 IS *eedges,*extrows,*extcols,*alleedges; 166 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 167 PetscScalar *vals,*work; 168 PetscReal *rwork; 169 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 170 PetscInt ne,nv,Lv,order,n,field; 171 PetscInt n_neigh,*neigh,*n_shared,**shared; 172 PetscInt i,j,extmem,cum,maxsize,nee; 173 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 174 PetscInt *sfvleaves,*sfvroots; 175 PetscInt *corners,*cedges; 176 PetscInt *ecount,**eneighs,*vcount,**vneighs; 177 #if defined(PETSC_USE_DEBUG) 178 PetscInt *emarks; 179 #endif 180 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 181 PetscErrorCode ierr; 182 183 PetscFunctionBegin; 184 /* If the discrete gradient is defined for a subset of dofs and global is true, 185 it assumes G is given in global ordering for all the dofs. 186 Otherwise, the ordering is global for the Nedelec field */ 187 order = pcbddc->nedorder; 188 conforming = pcbddc->conforming; 189 field = pcbddc->nedfield; 190 global = pcbddc->nedglobal; 191 setprimal = PETSC_FALSE; 192 print = PETSC_FALSE; 193 singular = PETSC_FALSE; 194 195 /* Command line customization */ 196 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 199 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 200 /* print debug info TODO: to be removed */ 201 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 202 ierr = PetscOptionsEnd();CHKERRQ(ierr); 203 204 /* Return if there are no edges in the decomposition and the problem is not singular */ 205 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 206 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 207 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 208 if (!singular) { 209 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 210 lrc[0] = PETSC_FALSE; 211 for (i=0;i<n;i++) { 212 if (PetscRealPart(vals[i]) > 2.) { 213 lrc[0] = PETSC_TRUE; 214 break; 215 } 216 } 217 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 218 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 219 if (!lrc[1]) PetscFunctionReturn(0); 220 } 221 222 /* Get Nedelec field */ 223 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal); 224 if (pcbddc->n_ISForDofsLocal && field >= 0) { 225 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 226 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 227 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 228 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 229 ne = n; 230 nedfieldlocal = NULL; 231 global = PETSC_TRUE; 232 } else if (field == PETSC_DECIDE) { 233 PetscInt rst,ren,*idx; 234 235 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 236 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 237 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 238 for (i=rst;i<ren;i++) { 239 PetscInt nc; 240 241 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 242 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 243 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 244 } 245 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 246 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 247 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 248 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 249 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 250 } else { 251 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 252 } 253 254 /* Sanity checks */ 255 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 256 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 257 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order); 258 259 /* Just set primal dofs and return */ 260 if (setprimal) { 261 IS enedfieldlocal; 262 PetscInt *eidxs; 263 264 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 265 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 266 if (nedfieldlocal) { 267 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 268 for (i=0,cum=0;i<ne;i++) { 269 if (PetscRealPart(vals[idxs[i]]) > 2.) { 270 eidxs[cum++] = idxs[i]; 271 } 272 } 273 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 274 } else { 275 for (i=0,cum=0;i<ne;i++) { 276 if (PetscRealPart(vals[i]) > 2.) { 277 eidxs[cum++] = i; 278 } 279 } 280 } 281 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 282 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 283 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 284 ierr = PetscFree(eidxs);CHKERRQ(ierr); 285 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 286 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 /* Compute some l2g maps */ 291 if (nedfieldlocal) { 292 IS is; 293 294 /* need to map from the local Nedelec field to local numbering */ 295 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 296 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 297 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 298 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 299 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 300 if (global) { 301 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 302 el2g = al2g; 303 } else { 304 IS gis; 305 306 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 307 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 308 ierr = ISDestroy(&gis);CHKERRQ(ierr); 309 } 310 ierr = ISDestroy(&is);CHKERRQ(ierr); 311 } else { 312 /* restore default */ 313 pcbddc->nedfield = -1; 314 /* one ref for the destruction of al2g, one for el2g */ 315 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 316 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 317 el2g = al2g; 318 fl2g = NULL; 319 } 320 321 /* Start communication to drop connections for interior edges (for cc analysis only) */ 322 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 323 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 324 if (nedfieldlocal) { 325 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 326 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 327 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 328 } else { 329 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 330 } 331 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 332 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 333 334 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 335 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 336 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 337 if (global) { 338 PetscInt rst; 339 340 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 341 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 342 if (matis->sf_rootdata[i] < 2) { 343 matis->sf_rootdata[cum++] = i + rst; 344 } 345 } 346 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 347 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 348 } else { 349 PetscInt *tbz; 350 351 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 352 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 353 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 354 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 355 for (i=0,cum=0;i<ne;i++) 356 if (matis->sf_leafdata[idxs[i]] == 1) 357 tbz[cum++] = i; 358 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 359 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 360 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 361 ierr = PetscFree(tbz);CHKERRQ(ierr); 362 } 363 } else { /* we need the entire G to infer the nullspace */ 364 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 365 G = pcbddc->discretegradient; 366 } 367 368 /* Extract subdomain relevant rows of G */ 369 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 370 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 371 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 372 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 373 ierr = ISDestroy(&lned);CHKERRQ(ierr); 374 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 375 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 376 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 377 378 /* SF for nodal dofs communications */ 379 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 380 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 381 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 382 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 383 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 384 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 385 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 386 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 387 i = singular ? 2 : 1; 388 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 389 390 /* Destroy temporary G created in MATIS format and modified G */ 391 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 392 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 393 ierr = MatDestroy(&G);CHKERRQ(ierr); 394 395 if (print) { 396 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 397 ierr = MatView(lG,NULL);CHKERRQ(ierr); 398 } 399 400 /* Save lG for values insertion in change of basis */ 401 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 402 403 /* Analyze the edge-nodes connections (duplicate lG) */ 404 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 405 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 406 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 407 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 409 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 410 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 411 /* need to import the boundary specification to ensure the 412 proper detection of coarse edges' endpoints */ 413 if (pcbddc->DirichletBoundariesLocal) { 414 IS is; 415 416 if (fl2g) { 417 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 418 } else { 419 is = pcbddc->DirichletBoundariesLocal; 420 } 421 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 422 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 423 for (i=0;i<cum;i++) { 424 if (idxs[i] >= 0) { 425 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 426 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 427 } 428 } 429 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 430 if (fl2g) { 431 ierr = ISDestroy(&is);CHKERRQ(ierr); 432 } 433 } 434 if (pcbddc->NeumannBoundariesLocal) { 435 IS is; 436 437 if (fl2g) { 438 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 439 } else { 440 is = pcbddc->NeumannBoundariesLocal; 441 } 442 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 443 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 444 for (i=0;i<cum;i++) { 445 if (idxs[i] >= 0) { 446 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 447 } 448 } 449 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 450 if (fl2g) { 451 ierr = ISDestroy(&is);CHKERRQ(ierr); 452 } 453 } 454 455 /* Count neighs per dof */ 456 ierr = ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 457 ierr = ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 458 459 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 460 for proper detection of coarse edges' endpoints */ 461 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 462 for (i=0;i<ne;i++) { 463 if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) { 464 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 465 } 466 } 467 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 468 if (!conforming) { 469 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 470 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 471 } 472 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 473 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 474 cum = 0; 475 for (i=0;i<ne;i++) { 476 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 477 if (!PetscBTLookup(btee,i)) { 478 marks[cum++] = i; 479 continue; 480 } 481 /* set badly connected edge dofs as primal */ 482 if (!conforming) { 483 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 484 marks[cum++] = i; 485 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 486 for (j=ii[i];j<ii[i+1];j++) { 487 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 488 } 489 } else { 490 /* every edge dofs should be connected trough a certain number of nodal dofs 491 to other edge dofs belonging to coarse edges 492 - at most 2 endpoints 493 - order-1 interior nodal dofs 494 - no undefined nodal dofs (nconn < order) 495 */ 496 PetscInt ends = 0,ints = 0, undef = 0; 497 for (j=ii[i];j<ii[i+1];j++) { 498 PetscInt v = jj[j],k; 499 PetscInt nconn = iit[v+1]-iit[v]; 500 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 501 if (nconn > order) ends++; 502 else if (nconn == order) ints++; 503 else undef++; 504 } 505 if (undef || ends > 2 || ints != order -1) { 506 marks[cum++] = i; 507 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 508 for (j=ii[i];j<ii[i+1];j++) { 509 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 510 } 511 } 512 } 513 } 514 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 515 if (!order && ii[i+1] != ii[i]) { 516 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 517 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 518 } 519 } 520 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 521 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 522 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 523 if (!conforming) { 524 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 525 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 526 } 527 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 528 529 /* identify splitpoints and corner candidates */ 530 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 531 if (print) { 532 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 533 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 534 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 535 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 536 } 537 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 538 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 539 for (i=0;i<nv;i++) { 540 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 541 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 542 if (!order) { /* variable order */ 543 PetscReal vorder = 0.; 544 545 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 546 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 547 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test); 548 ord = 1; 549 } 550 #if defined(PETSC_USE_DEBUG) 551 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %D connected with nodal dof %D with order %D",test,i,ord); 552 #endif 553 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 554 if (PetscBTLookup(btbd,jj[j])) { 555 bdir = PETSC_TRUE; 556 break; 557 } 558 if (vc != ecount[jj[j]]) { 559 sneighs = PETSC_FALSE; 560 } else { 561 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 562 for (k=0;k<vc;k++) { 563 if (vn[k] != en[k]) { 564 sneighs = PETSC_FALSE; 565 break; 566 } 567 } 568 } 569 } 570 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 571 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir); 572 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 573 } else if (test == ord) { 574 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 575 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i); 576 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 577 } else { 578 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i); 579 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 580 } 581 } 582 } 583 ierr = ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 584 ierr = ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 585 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 586 587 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 588 if (order != 1) { 589 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 590 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 591 for (i=0;i<nv;i++) { 592 if (PetscBTLookup(btvcand,i)) { 593 PetscBool found = PETSC_FALSE; 594 for (j=ii[i];j<ii[i+1] && !found;j++) { 595 PetscInt k,e = jj[j]; 596 if (PetscBTLookup(bte,e)) continue; 597 for (k=iit[e];k<iit[e+1];k++) { 598 PetscInt v = jjt[k]; 599 if (v != i && PetscBTLookup(btvcand,v)) { 600 found = PETSC_TRUE; 601 break; 602 } 603 } 604 } 605 if (!found) { 606 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D CLEARED\n",i); 607 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 608 } else { 609 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D ACCEPTED\n",i); 610 } 611 } 612 } 613 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 614 } 615 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 616 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 617 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 618 619 /* Get the local G^T explicitly */ 620 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 621 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 622 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 623 624 /* Mark interior nodal dofs */ 625 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 626 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 627 for (i=1;i<n_neigh;i++) { 628 for (j=0;j<n_shared[i];j++) { 629 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 630 } 631 } 632 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 633 634 /* communicate corners and splitpoints */ 635 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 636 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 637 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 638 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 639 640 if (print) { 641 IS tbz; 642 643 cum = 0; 644 for (i=0;i<nv;i++) 645 if (sfvleaves[i]) 646 vmarks[cum++] = i; 647 648 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 649 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 650 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 651 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 652 } 653 654 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 655 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 656 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 657 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 658 659 /* Zero rows of lGt corresponding to identified corners 660 and interior nodal dofs */ 661 cum = 0; 662 for (i=0;i<nv;i++) { 663 if (sfvleaves[i]) { 664 vmarks[cum++] = i; 665 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 666 } 667 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 668 } 669 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 670 if (print) { 671 IS tbz; 672 673 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 674 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 675 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 676 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 677 } 678 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 679 ierr = PetscFree(vmarks);CHKERRQ(ierr); 680 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 681 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 682 683 /* Recompute G */ 684 ierr = MatDestroy(&lG);CHKERRQ(ierr); 685 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 686 if (print) { 687 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 688 ierr = MatView(lG,NULL);CHKERRQ(ierr); 689 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 690 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 691 } 692 693 /* Get primal dofs (if any) */ 694 cum = 0; 695 for (i=0;i<ne;i++) { 696 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 697 } 698 if (fl2g) { 699 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 700 } 701 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 702 if (print) { 703 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 704 ierr = ISView(primals,NULL);CHKERRQ(ierr); 705 } 706 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 707 /* TODO: what if the user passed in some of them ? */ 708 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 709 ierr = ISDestroy(&primals);CHKERRQ(ierr); 710 711 /* Compute edge connectivity */ 712 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 713 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 714 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 715 if (fl2g) { 716 PetscBT btf; 717 PetscInt *iia,*jja,*iiu,*jju; 718 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 719 720 /* create CSR for all local dofs */ 721 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 722 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 723 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n); 724 iiu = pcbddc->mat_graph->xadj; 725 jju = pcbddc->mat_graph->adjncy; 726 } else if (pcbddc->use_local_adj) { 727 rest = PETSC_TRUE; 728 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 729 } else { 730 free = PETSC_TRUE; 731 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 732 iiu[0] = 0; 733 for (i=0;i<n;i++) { 734 iiu[i+1] = i+1; 735 jju[i] = -1; 736 } 737 } 738 739 /* import sizes of CSR */ 740 iia[0] = 0; 741 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 742 743 /* overwrite entries corresponding to the Nedelec field */ 744 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 745 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 746 for (i=0;i<ne;i++) { 747 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 748 iia[idxs[i]+1] = ii[i+1]-ii[i]; 749 } 750 751 /* iia in CSR */ 752 for (i=0;i<n;i++) iia[i+1] += iia[i]; 753 754 /* jja in CSR */ 755 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 756 for (i=0;i<n;i++) 757 if (!PetscBTLookup(btf,i)) 758 for (j=0;j<iiu[i+1]-iiu[i];j++) 759 jja[iia[i]+j] = jju[iiu[i]+j]; 760 761 /* map edge dofs connectivity */ 762 if (jj) { 763 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 764 for (i=0;i<ne;i++) { 765 PetscInt e = idxs[i]; 766 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 767 } 768 } 769 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 770 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 771 if (rest) { 772 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 773 } 774 if (free) { 775 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 776 } 777 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 778 } else { 779 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 780 } 781 782 /* Analyze interface for edge dofs */ 783 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 784 pcbddc->mat_graph->twodim = PETSC_FALSE; 785 786 /* Get coarse edges in the edge space */ 787 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 788 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 789 790 if (fl2g) { 791 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 792 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 793 for (i=0;i<nee;i++) { 794 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 795 } 796 } else { 797 eedges = alleedges; 798 primals = allprimals; 799 } 800 801 /* Mark fine edge dofs with their coarse edge id */ 802 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 803 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 804 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 805 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 806 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 807 if (print) { 808 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 809 ierr = ISView(primals,NULL);CHKERRQ(ierr); 810 } 811 812 maxsize = 0; 813 for (i=0;i<nee;i++) { 814 PetscInt size,mark = i+1; 815 816 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 817 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 818 for (j=0;j<size;j++) marks[idxs[j]] = mark; 819 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 820 maxsize = PetscMax(maxsize,size); 821 } 822 823 /* Find coarse edge endpoints */ 824 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 825 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 826 for (i=0;i<nee;i++) { 827 PetscInt mark = i+1,size; 828 829 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 830 if (!size && nedfieldlocal) continue; 831 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 832 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 833 if (print) { 834 ierr = PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);CHKERRQ(ierr); 835 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 836 } 837 for (j=0;j<size;j++) { 838 PetscInt k, ee = idxs[j]; 839 if (print) PetscPrintf(PETSC_COMM_SELF," idx %D\n",ee); 840 for (k=ii[ee];k<ii[ee+1];k++) { 841 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %D\n",jj[k]); 842 if (PetscBTLookup(btv,jj[k])) { 843 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %D\n",jj[k]); 844 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 845 PetscInt k2; 846 PetscBool corner = PETSC_FALSE; 847 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 848 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 849 /* it's a corner if either is connected with an edge dof belonging to a different cc or 850 if the edge dof lie on the natural part of the boundary */ 851 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 852 corner = PETSC_TRUE; 853 break; 854 } 855 } 856 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 857 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %D\n",jj[k]); 858 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 859 } else { 860 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 861 } 862 } 863 } 864 } 865 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 866 } 867 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 868 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 869 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 870 871 /* Reset marked primal dofs */ 872 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 873 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 874 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 875 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 876 877 /* Now use the initial lG */ 878 ierr = MatDestroy(&lG);CHKERRQ(ierr); 879 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 880 lG = lGinit; 881 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 882 883 /* Compute extended cols indices */ 884 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 885 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 886 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 887 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 888 i *= maxsize; 889 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 890 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 891 eerr = PETSC_FALSE; 892 for (i=0;i<nee;i++) { 893 PetscInt size,found = 0; 894 895 cum = 0; 896 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 897 if (!size && nedfieldlocal) continue; 898 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 899 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 900 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 901 for (j=0;j<size;j++) { 902 PetscInt k,ee = idxs[j]; 903 for (k=ii[ee];k<ii[ee+1];k++) { 904 PetscInt vv = jj[k]; 905 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 906 else if (!PetscBTLookupSet(btvc,vv)) found++; 907 } 908 } 909 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 910 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 911 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 912 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 913 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 914 /* it may happen that endpoints are not defined at this point 915 if it is the case, mark this edge for a second pass */ 916 if (cum != size -1 || found != 2) { 917 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 918 if (print) { 919 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 920 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 921 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 922 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 923 } 924 eerr = PETSC_TRUE; 925 } 926 } 927 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 928 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 929 if (done) { 930 PetscInt *newprimals; 931 932 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 933 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 934 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 935 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 936 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 937 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 938 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr); 939 for (i=0;i<nee;i++) { 940 PetscBool has_candidates = PETSC_FALSE; 941 if (PetscBTLookup(bter,i)) { 942 PetscInt size,mark = i+1; 943 944 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 945 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 946 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 947 for (j=0;j<size;j++) { 948 PetscInt k,ee = idxs[j]; 949 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]); 950 for (k=ii[ee];k<ii[ee+1];k++) { 951 /* set all candidates located on the edge as corners */ 952 if (PetscBTLookup(btvcand,jj[k])) { 953 PetscInt k2,vv = jj[k]; 954 has_candidates = PETSC_TRUE; 955 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %D\n",vv); 956 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 957 /* set all edge dofs connected to candidate as primals */ 958 for (k2=iit[vv];k2<iit[vv+1];k2++) { 959 if (marks[jjt[k2]] == mark) { 960 PetscInt k3,ee2 = jjt[k2]; 961 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %D\n",ee2); 962 newprimals[cum++] = ee2; 963 /* finally set the new corners */ 964 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 965 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %D\n",jj[k3]); 966 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 967 } 968 } 969 } 970 } else { 971 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %D\n",jj[k]); 972 } 973 } 974 } 975 if (!has_candidates) { /* circular edge */ 976 PetscInt k, ee = idxs[0],*tmarks; 977 978 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 979 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %D\n",i); 980 for (k=ii[ee];k<ii[ee+1];k++) { 981 PetscInt k2; 982 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %D\n",jj[k]); 983 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 984 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 985 } 986 for (j=0;j<size;j++) { 987 if (tmarks[idxs[j]] > 1) { 988 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %D\n",idxs[j]); 989 newprimals[cum++] = idxs[j]; 990 } 991 } 992 ierr = PetscFree(tmarks);CHKERRQ(ierr); 993 } 994 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 995 } 996 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 997 } 998 ierr = PetscFree(extcols);CHKERRQ(ierr); 999 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1000 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1001 if (fl2g) { 1002 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1003 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1004 for (i=0;i<nee;i++) { 1005 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1006 } 1007 ierr = PetscFree(eedges);CHKERRQ(ierr); 1008 } 1009 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1010 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1011 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1012 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1013 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1014 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1015 pcbddc->mat_graph->twodim = PETSC_FALSE; 1016 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1017 if (fl2g) { 1018 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1019 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1020 for (i=0;i<nee;i++) { 1021 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1022 } 1023 } else { 1024 eedges = alleedges; 1025 primals = allprimals; 1026 } 1027 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1028 1029 /* Mark again */ 1030 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1031 for (i=0;i<nee;i++) { 1032 PetscInt size,mark = i+1; 1033 1034 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1035 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1036 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1037 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1038 } 1039 if (print) { 1040 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1041 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1042 } 1043 1044 /* Recompute extended cols */ 1045 eerr = PETSC_FALSE; 1046 for (i=0;i<nee;i++) { 1047 PetscInt size; 1048 1049 cum = 0; 1050 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1051 if (!size && nedfieldlocal) continue; 1052 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1053 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1054 for (j=0;j<size;j++) { 1055 PetscInt k,ee = idxs[j]; 1056 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1057 } 1058 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1059 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1060 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1061 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1062 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1063 if (cum != size -1) { 1064 if (print) { 1065 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1066 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1067 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1068 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1069 } 1070 eerr = PETSC_TRUE; 1071 } 1072 } 1073 } 1074 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1075 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1076 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1077 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1078 /* an error should not occur at this point */ 1079 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1080 1081 /* Check the number of endpoints */ 1082 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1083 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1084 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1085 for (i=0;i<nee;i++) { 1086 PetscInt size, found = 0, gc[2]; 1087 1088 /* init with defaults */ 1089 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1090 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1091 if (!size && nedfieldlocal) continue; 1092 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1093 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1094 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1095 for (j=0;j<size;j++) { 1096 PetscInt k,ee = idxs[j]; 1097 for (k=ii[ee];k<ii[ee+1];k++) { 1098 PetscInt vv = jj[k]; 1099 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1100 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i); 1101 corners[i*2+found++] = vv; 1102 } 1103 } 1104 } 1105 if (found != 2) { 1106 PetscInt e; 1107 if (fl2g) { 1108 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1109 } else { 1110 e = idxs[0]; 1111 } 1112 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]); 1113 } 1114 1115 /* get primal dof index on this coarse edge */ 1116 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1117 if (gc[0] > gc[1]) { 1118 PetscInt swap = corners[2*i]; 1119 corners[2*i] = corners[2*i+1]; 1120 corners[2*i+1] = swap; 1121 } 1122 cedges[i] = idxs[size-1]; 1123 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1124 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1125 } 1126 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1127 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1128 1129 #if defined(PETSC_USE_DEBUG) 1130 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1131 not interfere with neighbouring coarse edges */ 1132 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1133 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1134 for (i=0;i<nv;i++) { 1135 PetscInt emax = 0,eemax = 0; 1136 1137 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1138 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1139 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1140 for (j=1;j<nee+1;j++) { 1141 if (emax < emarks[j]) { 1142 emax = emarks[j]; 1143 eemax = j; 1144 } 1145 } 1146 /* not relevant for edges */ 1147 if (!eemax) continue; 1148 1149 for (j=ii[i];j<ii[i+1];j++) { 1150 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1151 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]); 1152 } 1153 } 1154 } 1155 ierr = PetscFree(emarks);CHKERRQ(ierr); 1156 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1157 #endif 1158 1159 /* Compute extended rows indices for edge blocks of the change of basis */ 1160 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1161 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1162 extmem *= maxsize; 1163 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1164 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1165 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1166 for (i=0;i<nv;i++) { 1167 PetscInt mark = 0,size,start; 1168 1169 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1170 for (j=ii[i];j<ii[i+1];j++) 1171 if (marks[jj[j]] && !mark) 1172 mark = marks[jj[j]]; 1173 1174 /* not relevant */ 1175 if (!mark) continue; 1176 1177 /* import extended row */ 1178 mark--; 1179 start = mark*extmem+extrowcum[mark]; 1180 size = ii[i+1]-ii[i]; 1181 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem); 1182 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1183 extrowcum[mark] += size; 1184 } 1185 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1186 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1187 ierr = PetscFree(marks);CHKERRQ(ierr); 1188 1189 /* Compress extrows */ 1190 cum = 0; 1191 for (i=0;i<nee;i++) { 1192 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1193 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1194 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1195 cum = PetscMax(cum,size); 1196 } 1197 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1198 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1199 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1200 1201 /* Workspace for lapack inner calls and VecSetValues */ 1202 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1203 1204 /* Create change of basis matrix (preallocation can be improved) */ 1205 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1206 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1207 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1208 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1209 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1210 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1211 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1212 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1213 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1214 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1215 1216 /* Defaults to identity */ 1217 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1218 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1219 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1220 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1221 1222 /* Create discrete gradient for the coarser level if needed */ 1223 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1224 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1225 if (pcbddc->current_level < pcbddc->max_levels) { 1226 ISLocalToGlobalMapping cel2g,cvl2g; 1227 IS wis,gwis; 1228 PetscInt cnv,cne; 1229 1230 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1231 if (fl2g) { 1232 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1233 } else { 1234 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1235 pcbddc->nedclocal = wis; 1236 } 1237 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1238 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1239 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1240 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1241 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1242 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1243 1244 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1245 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1246 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1247 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1248 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1249 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1250 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1251 1252 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1253 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1254 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1255 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1256 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1257 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1258 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1259 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1260 } 1261 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1262 1263 #if defined(PRINT_GDET) 1264 inc = 0; 1265 lev = pcbddc->current_level; 1266 #endif 1267 1268 /* Insert values in the change of basis matrix */ 1269 for (i=0;i<nee;i++) { 1270 Mat Gins = NULL, GKins = NULL; 1271 IS cornersis = NULL; 1272 PetscScalar cvals[2]; 1273 1274 if (pcbddc->nedcG) { 1275 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1276 } 1277 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1278 if (Gins && GKins) { 1279 PetscScalar *data; 1280 const PetscInt *rows,*cols; 1281 PetscInt nrh,nch,nrc,ncc; 1282 1283 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1284 /* H1 */ 1285 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1286 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1287 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1288 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1289 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1290 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1291 /* complement */ 1292 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1293 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i); 1294 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i); 1295 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc); 1296 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1297 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1298 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1299 1300 /* coarse discrete gradient */ 1301 if (pcbddc->nedcG) { 1302 PetscInt cols[2]; 1303 1304 cols[0] = 2*i; 1305 cols[1] = 2*i+1; 1306 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1307 } 1308 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1309 } 1310 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1311 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1312 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1313 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1314 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1315 } 1316 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1317 1318 /* Start assembling */ 1319 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1320 if (pcbddc->nedcG) { 1321 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1322 } 1323 1324 /* Free */ 1325 if (fl2g) { 1326 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1327 for (i=0;i<nee;i++) { 1328 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1329 } 1330 ierr = PetscFree(eedges);CHKERRQ(ierr); 1331 } 1332 1333 /* hack mat_graph with primal dofs on the coarse edges */ 1334 { 1335 PCBDDCGraph graph = pcbddc->mat_graph; 1336 PetscInt *oqueue = graph->queue; 1337 PetscInt *ocptr = graph->cptr; 1338 PetscInt ncc,*idxs; 1339 1340 /* find first primal edge */ 1341 if (pcbddc->nedclocal) { 1342 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1343 } else { 1344 if (fl2g) { 1345 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1346 } 1347 idxs = cedges; 1348 } 1349 cum = 0; 1350 while (cum < nee && cedges[cum] < 0) cum++; 1351 1352 /* adapt connected components */ 1353 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1354 graph->cptr[0] = 0; 1355 for (i=0,ncc=0;i<graph->ncc;i++) { 1356 PetscInt lc = ocptr[i+1]-ocptr[i]; 1357 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1358 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1359 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1360 ncc++; 1361 lc--; 1362 cum++; 1363 while (cum < nee && cedges[cum] < 0) cum++; 1364 } 1365 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1366 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1367 ncc++; 1368 } 1369 graph->ncc = ncc; 1370 if (pcbddc->nedclocal) { 1371 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1372 } 1373 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1374 } 1375 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1376 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1377 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1378 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1379 1380 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1381 ierr = PetscFree(extrow);CHKERRQ(ierr); 1382 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1383 ierr = PetscFree(corners);CHKERRQ(ierr); 1384 ierr = PetscFree(cedges);CHKERRQ(ierr); 1385 ierr = PetscFree(extrows);CHKERRQ(ierr); 1386 ierr = PetscFree(extcols);CHKERRQ(ierr); 1387 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1388 1389 /* Complete assembling */ 1390 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1391 if (pcbddc->nedcG) { 1392 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1393 #if 0 1394 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1395 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1396 #endif 1397 } 1398 1399 /* set change of basis */ 1400 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1401 ierr = MatDestroy(&T);CHKERRQ(ierr); 1402 1403 PetscFunctionReturn(0); 1404 } 1405 1406 /* the near-null space of BDDC carries information on quadrature weights, 1407 and these can be collinear -> so cheat with MatNullSpaceCreate 1408 and create a suitable set of basis vectors first */ 1409 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1410 { 1411 PetscErrorCode ierr; 1412 PetscInt i; 1413 1414 PetscFunctionBegin; 1415 for (i=0;i<nvecs;i++) { 1416 PetscInt first,last; 1417 1418 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1419 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1420 if (i>=first && i < last) { 1421 PetscScalar *data; 1422 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1423 if (!has_const) { 1424 data[i-first] = 1.; 1425 } else { 1426 data[2*i-first] = 1./PetscSqrtReal(2.); 1427 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1428 } 1429 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1430 } 1431 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1432 } 1433 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1434 for (i=0;i<nvecs;i++) { /* reset vectors */ 1435 PetscInt first,last; 1436 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1437 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1438 if (i>=first && i < last) { 1439 PetscScalar *data; 1440 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1441 if (!has_const) { 1442 data[i-first] = 0.; 1443 } else { 1444 data[2*i-first] = 0.; 1445 data[2*i-first+1] = 0.; 1446 } 1447 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1448 } 1449 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1450 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1451 } 1452 PetscFunctionReturn(0); 1453 } 1454 1455 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1456 { 1457 Mat loc_divudotp; 1458 Vec p,v,vins,quad_vec,*quad_vecs; 1459 ISLocalToGlobalMapping map; 1460 PetscScalar *vals; 1461 const PetscScalar *array; 1462 PetscInt i,maxneighs,maxsize; 1463 PetscInt n_neigh,*neigh,*n_shared,**shared; 1464 PetscMPIInt rank; 1465 PetscErrorCode ierr; 1466 1467 PetscFunctionBegin; 1468 ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1469 ierr = MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1470 if (!maxneighs) { 1471 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1472 *nnsp = NULL; 1473 PetscFunctionReturn(0); 1474 } 1475 maxsize = 0; 1476 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1477 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1478 /* create vectors to hold quadrature weights */ 1479 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1480 if (!transpose) { 1481 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1482 } else { 1483 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1484 } 1485 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1486 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1487 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1488 for (i=0;i<maxneighs;i++) { 1489 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1490 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1491 } 1492 1493 /* compute local quad vec */ 1494 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1495 if (!transpose) { 1496 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1497 } else { 1498 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1499 } 1500 ierr = VecSet(p,1.);CHKERRQ(ierr); 1501 if (!transpose) { 1502 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1503 } else { 1504 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1505 } 1506 if (vl2l) { 1507 Mat lA; 1508 VecScatter sc; 1509 1510 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1511 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1512 ierr = VecScatterCreateWithData(v,NULL,vins,vl2l,&sc);CHKERRQ(ierr); 1513 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1514 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1515 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1516 } else { 1517 vins = v; 1518 } 1519 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1520 ierr = VecDestroy(&p);CHKERRQ(ierr); 1521 1522 /* insert in global quadrature vecs */ 1523 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1524 for (i=0;i<n_neigh;i++) { 1525 const PetscInt *idxs; 1526 PetscInt idx,nn,j; 1527 1528 idxs = shared[i]; 1529 nn = n_shared[i]; 1530 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1531 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1532 idx = -(idx+1); 1533 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1534 } 1535 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1536 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1537 if (vl2l) { 1538 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1539 } 1540 ierr = VecDestroy(&v);CHKERRQ(ierr); 1541 ierr = PetscFree(vals);CHKERRQ(ierr); 1542 1543 /* assemble near null space */ 1544 for (i=0;i<maxneighs;i++) { 1545 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1546 } 1547 for (i=0;i<maxneighs;i++) { 1548 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1549 ierr = VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");CHKERRQ(ierr); 1550 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1551 } 1552 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1553 PetscFunctionReturn(0); 1554 } 1555 1556 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1557 { 1558 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1559 PetscErrorCode ierr; 1560 1561 PetscFunctionBegin; 1562 if (primalv) { 1563 if (pcbddc->user_primal_vertices_local) { 1564 IS list[2], newp; 1565 1566 list[0] = primalv; 1567 list[1] = pcbddc->user_primal_vertices_local; 1568 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1569 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1570 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1571 pcbddc->user_primal_vertices_local = newp; 1572 } else { 1573 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1574 } 1575 } 1576 PetscFunctionReturn(0); 1577 } 1578 1579 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1580 { 1581 PetscInt f, *comp = (PetscInt *)ctx; 1582 1583 PetscFunctionBegin; 1584 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1585 PetscFunctionReturn(0); 1586 } 1587 1588 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1589 { 1590 PetscErrorCode ierr; 1591 Vec local,global; 1592 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1593 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1594 PetscBool monolithic = PETSC_FALSE; 1595 1596 PetscFunctionBegin; 1597 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1598 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1599 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1600 /* need to convert from global to local topology information and remove references to information in global ordering */ 1601 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1602 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1603 if (monolithic) { /* just get block size to properly compute vertices */ 1604 if (pcbddc->vertex_size == 1) { 1605 ierr = MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);CHKERRQ(ierr); 1606 } 1607 goto boundary; 1608 } 1609 1610 if (pcbddc->user_provided_isfordofs) { 1611 if (pcbddc->n_ISForDofs) { 1612 PetscInt i; 1613 1614 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1615 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1616 PetscInt bs; 1617 1618 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1619 ierr = ISGetBlockSize(pcbddc->ISForDofs[i],&bs);CHKERRQ(ierr); 1620 ierr = ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);CHKERRQ(ierr); 1621 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1622 } 1623 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1624 pcbddc->n_ISForDofs = 0; 1625 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1626 } 1627 } else { 1628 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1629 DM dm; 1630 1631 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1632 if (!dm) { 1633 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1634 } 1635 if (dm) { 1636 IS *fields; 1637 PetscInt nf,i; 1638 1639 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1640 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1641 for (i=0;i<nf;i++) { 1642 PetscInt bs; 1643 1644 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1645 ierr = ISGetBlockSize(fields[i],&bs);CHKERRQ(ierr); 1646 ierr = ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);CHKERRQ(ierr); 1647 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1648 } 1649 ierr = PetscFree(fields);CHKERRQ(ierr); 1650 pcbddc->n_ISForDofsLocal = nf; 1651 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1652 PetscContainer c; 1653 1654 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1655 if (c) { 1656 MatISLocalFields lf; 1657 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1658 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1659 } else { /* fallback, create the default fields if bs > 1 */ 1660 PetscInt i, n = matis->A->rmap->n; 1661 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1662 if (i > 1) { 1663 pcbddc->n_ISForDofsLocal = i; 1664 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1665 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1666 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1667 } 1668 } 1669 } 1670 } 1671 } else { 1672 PetscInt i; 1673 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1674 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1675 } 1676 } 1677 } 1678 1679 boundary: 1680 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1681 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1682 } else if (pcbddc->DirichletBoundariesLocal) { 1683 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1684 } 1685 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1686 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1687 } else if (pcbddc->NeumannBoundariesLocal) { 1688 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1689 } 1690 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1691 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1692 } 1693 ierr = VecDestroy(&global);CHKERRQ(ierr); 1694 ierr = VecDestroy(&local);CHKERRQ(ierr); 1695 /* detect local disconnected subdomains if requested (use matis->A) */ 1696 if (pcbddc->detect_disconnected) { 1697 IS primalv = NULL; 1698 PetscInt i; 1699 PetscBool filter = pcbddc->detect_disconnected_filter; 1700 1701 for (i=0;i<pcbddc->n_local_subs;i++) { 1702 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1703 } 1704 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1705 ierr = PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1706 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1707 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1708 } 1709 /* early stage corner detection */ 1710 { 1711 DM dm; 1712 1713 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1714 if (dm) { 1715 PetscBool isda; 1716 1717 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1718 if (isda) { 1719 ISLocalToGlobalMapping l2l; 1720 IS corners; 1721 Mat lA; 1722 1723 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1724 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1725 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1726 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1727 if (l2l && corners) { 1728 const PetscInt *idx; 1729 PetscInt dof,bs,*idxout,n; 1730 1731 ierr = DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 1732 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1733 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1734 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1735 if (bs == dof) { 1736 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1737 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1738 } else { /* the original DMDA local-to-local map have been modified */ 1739 PetscInt i,d; 1740 1741 ierr = PetscMalloc1(dof*n,&idxout);CHKERRQ(ierr); 1742 for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d; 1743 ierr = ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);CHKERRQ(ierr); 1744 1745 bs = 1; 1746 n *= dof; 1747 } 1748 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1749 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1750 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1751 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1752 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1753 pcbddc->corner_selected = PETSC_TRUE; 1754 } else if (corners) { /* not from DMDA */ 1755 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1756 } 1757 } 1758 } 1759 } 1760 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1761 DM dm; 1762 1763 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1764 if (!dm) { 1765 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1766 } 1767 if (dm) { 1768 Vec vcoords; 1769 PetscSection section; 1770 PetscReal *coords; 1771 PetscInt d,cdim,nl,nf,**ctxs; 1772 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1773 1774 ierr = DMGetCoordinateDim(dm,&cdim);CHKERRQ(ierr); 1775 ierr = DMGetSection(dm,§ion);CHKERRQ(ierr); 1776 ierr = PetscSectionGetNumFields(section,&nf);CHKERRQ(ierr); 1777 ierr = DMCreateGlobalVector(dm,&vcoords);CHKERRQ(ierr); 1778 ierr = VecGetLocalSize(vcoords,&nl);CHKERRQ(ierr); 1779 ierr = PetscMalloc1(nl*cdim,&coords);CHKERRQ(ierr); 1780 ierr = PetscMalloc2(nf,&funcs,nf,&ctxs);CHKERRQ(ierr); 1781 ierr = PetscMalloc1(nf,&ctxs[0]);CHKERRQ(ierr); 1782 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1783 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1784 for (d=0;d<cdim;d++) { 1785 PetscInt i; 1786 const PetscScalar *v; 1787 1788 for (i=0;i<nf;i++) ctxs[i][0] = d; 1789 ierr = DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);CHKERRQ(ierr); 1790 ierr = VecGetArrayRead(vcoords,&v);CHKERRQ(ierr); 1791 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1792 ierr = VecRestoreArrayRead(vcoords,&v);CHKERRQ(ierr); 1793 } 1794 ierr = VecDestroy(&vcoords);CHKERRQ(ierr); 1795 ierr = PCSetCoordinates(pc,cdim,nl,coords);CHKERRQ(ierr); 1796 ierr = PetscFree(coords);CHKERRQ(ierr); 1797 ierr = PetscFree(ctxs[0]);CHKERRQ(ierr); 1798 ierr = PetscFree2(funcs,ctxs);CHKERRQ(ierr); 1799 } 1800 } 1801 PetscFunctionReturn(0); 1802 } 1803 1804 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1805 { 1806 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1807 PetscErrorCode ierr; 1808 IS nis; 1809 const PetscInt *idxs; 1810 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1811 PetscBool *ld; 1812 1813 PetscFunctionBegin; 1814 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1815 if (mop == MPI_LAND) { 1816 /* init rootdata with true */ 1817 ld = (PetscBool*) matis->sf_rootdata; 1818 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1819 } else { 1820 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1821 } 1822 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1823 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1824 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1825 ld = (PetscBool*) matis->sf_leafdata; 1826 for (i=0;i<nd;i++) 1827 if (-1 < idxs[i] && idxs[i] < n) 1828 ld[idxs[i]] = PETSC_TRUE; 1829 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1830 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1831 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1832 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1833 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1834 if (mop == MPI_LAND) { 1835 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1836 } else { 1837 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1838 } 1839 for (i=0,nnd=0;i<n;i++) 1840 if (ld[i]) 1841 nidxs[nnd++] = i; 1842 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1843 ierr = ISDestroy(is);CHKERRQ(ierr); 1844 *is = nis; 1845 PetscFunctionReturn(0); 1846 } 1847 1848 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1849 { 1850 PC_IS *pcis = (PC_IS*)(pc->data); 1851 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1852 PetscErrorCode ierr; 1853 1854 PetscFunctionBegin; 1855 if (!pcbddc->benign_have_null) { 1856 PetscFunctionReturn(0); 1857 } 1858 if (pcbddc->ChangeOfBasisMatrix) { 1859 Vec swap; 1860 1861 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1862 swap = pcbddc->work_change; 1863 pcbddc->work_change = r; 1864 r = swap; 1865 } 1866 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1867 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1868 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1869 ierr = VecSet(z,0.);CHKERRQ(ierr); 1870 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1871 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1872 if (pcbddc->ChangeOfBasisMatrix) { 1873 pcbddc->work_change = r; 1874 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1875 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1876 } 1877 PetscFunctionReturn(0); 1878 } 1879 1880 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1881 { 1882 PCBDDCBenignMatMult_ctx ctx; 1883 PetscErrorCode ierr; 1884 PetscBool apply_right,apply_left,reset_x; 1885 1886 PetscFunctionBegin; 1887 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1888 if (transpose) { 1889 apply_right = ctx->apply_left; 1890 apply_left = ctx->apply_right; 1891 } else { 1892 apply_right = ctx->apply_right; 1893 apply_left = ctx->apply_left; 1894 } 1895 reset_x = PETSC_FALSE; 1896 if (apply_right) { 1897 const PetscScalar *ax; 1898 PetscInt nl,i; 1899 1900 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1901 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1902 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1903 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1904 for (i=0;i<ctx->benign_n;i++) { 1905 PetscScalar sum,val; 1906 const PetscInt *idxs; 1907 PetscInt nz,j; 1908 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1909 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1910 sum = 0.; 1911 if (ctx->apply_p0) { 1912 val = ctx->work[idxs[nz-1]]; 1913 for (j=0;j<nz-1;j++) { 1914 sum += ctx->work[idxs[j]]; 1915 ctx->work[idxs[j]] += val; 1916 } 1917 } else { 1918 for (j=0;j<nz-1;j++) { 1919 sum += ctx->work[idxs[j]]; 1920 } 1921 } 1922 ctx->work[idxs[nz-1]] -= sum; 1923 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1924 } 1925 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1926 reset_x = PETSC_TRUE; 1927 } 1928 if (transpose) { 1929 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1930 } else { 1931 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1932 } 1933 if (reset_x) { 1934 ierr = VecResetArray(x);CHKERRQ(ierr); 1935 } 1936 if (apply_left) { 1937 PetscScalar *ay; 1938 PetscInt i; 1939 1940 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1941 for (i=0;i<ctx->benign_n;i++) { 1942 PetscScalar sum,val; 1943 const PetscInt *idxs; 1944 PetscInt nz,j; 1945 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1946 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1947 val = -ay[idxs[nz-1]]; 1948 if (ctx->apply_p0) { 1949 sum = 0.; 1950 for (j=0;j<nz-1;j++) { 1951 sum += ay[idxs[j]]; 1952 ay[idxs[j]] += val; 1953 } 1954 ay[idxs[nz-1]] += sum; 1955 } else { 1956 for (j=0;j<nz-1;j++) { 1957 ay[idxs[j]] += val; 1958 } 1959 ay[idxs[nz-1]] = 0.; 1960 } 1961 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1962 } 1963 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1964 } 1965 PetscFunctionReturn(0); 1966 } 1967 1968 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1969 { 1970 PetscErrorCode ierr; 1971 1972 PetscFunctionBegin; 1973 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1974 PetscFunctionReturn(0); 1975 } 1976 1977 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1978 { 1979 PetscErrorCode ierr; 1980 1981 PetscFunctionBegin; 1982 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1983 PetscFunctionReturn(0); 1984 } 1985 1986 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1987 { 1988 PC_IS *pcis = (PC_IS*)pc->data; 1989 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1990 PCBDDCBenignMatMult_ctx ctx; 1991 PetscErrorCode ierr; 1992 1993 PetscFunctionBegin; 1994 if (!restore) { 1995 Mat A_IB,A_BI; 1996 PetscScalar *work; 1997 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1998 1999 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2000 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2001 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 2002 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 2003 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2004 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 2005 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 2006 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 2007 ierr = PetscNew(&ctx);CHKERRQ(ierr); 2008 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 2009 ctx->apply_left = PETSC_TRUE; 2010 ctx->apply_right = PETSC_FALSE; 2011 ctx->apply_p0 = PETSC_FALSE; 2012 ctx->benign_n = pcbddc->benign_n; 2013 if (reuse) { 2014 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2015 ctx->free = PETSC_FALSE; 2016 } else { /* TODO: could be optimized for successive solves */ 2017 ISLocalToGlobalMapping N_to_D; 2018 PetscInt i; 2019 2020 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2021 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2022 for (i=0;i<pcbddc->benign_n;i++) { 2023 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2024 } 2025 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2026 ctx->free = PETSC_TRUE; 2027 } 2028 ctx->A = pcis->A_IB; 2029 ctx->work = work; 2030 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2031 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2032 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2033 pcis->A_IB = A_IB; 2034 2035 /* A_BI as A_IB^T */ 2036 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2037 pcbddc->benign_original_mat = pcis->A_BI; 2038 pcis->A_BI = A_BI; 2039 } else { 2040 if (!pcbddc->benign_original_mat) { 2041 PetscFunctionReturn(0); 2042 } 2043 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2044 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2045 pcis->A_IB = ctx->A; 2046 ctx->A = NULL; 2047 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2048 pcis->A_BI = pcbddc->benign_original_mat; 2049 pcbddc->benign_original_mat = NULL; 2050 if (ctx->free) { 2051 PetscInt i; 2052 for (i=0;i<ctx->benign_n;i++) { 2053 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2054 } 2055 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2056 } 2057 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2058 ierr = PetscFree(ctx);CHKERRQ(ierr); 2059 } 2060 PetscFunctionReturn(0); 2061 } 2062 2063 /* used just in bddc debug mode */ 2064 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2065 { 2066 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2067 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2068 Mat An; 2069 PetscErrorCode ierr; 2070 2071 PetscFunctionBegin; 2072 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2073 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2074 if (is1) { 2075 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2076 ierr = MatDestroy(&An);CHKERRQ(ierr); 2077 } else { 2078 *B = An; 2079 } 2080 PetscFunctionReturn(0); 2081 } 2082 2083 /* TODO: add reuse flag */ 2084 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2085 { 2086 Mat Bt; 2087 PetscScalar *a,*bdata; 2088 const PetscInt *ii,*ij; 2089 PetscInt m,n,i,nnz,*bii,*bij; 2090 PetscBool flg_row; 2091 PetscErrorCode ierr; 2092 2093 PetscFunctionBegin; 2094 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2095 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2096 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2097 nnz = n; 2098 for (i=0;i<ii[n];i++) { 2099 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2100 } 2101 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2102 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2103 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2104 nnz = 0; 2105 bii[0] = 0; 2106 for (i=0;i<n;i++) { 2107 PetscInt j; 2108 for (j=ii[i];j<ii[i+1];j++) { 2109 PetscScalar entry = a[j]; 2110 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2111 bij[nnz] = ij[j]; 2112 bdata[nnz] = entry; 2113 nnz++; 2114 } 2115 } 2116 bii[i+1] = nnz; 2117 } 2118 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2119 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2120 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2121 { 2122 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2123 b->free_a = PETSC_TRUE; 2124 b->free_ij = PETSC_TRUE; 2125 } 2126 if (*B == A) { 2127 ierr = MatDestroy(&A);CHKERRQ(ierr); 2128 } 2129 *B = Bt; 2130 PetscFunctionReturn(0); 2131 } 2132 2133 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv) 2134 { 2135 Mat B = NULL; 2136 DM dm; 2137 IS is_dummy,*cc_n; 2138 ISLocalToGlobalMapping l2gmap_dummy; 2139 PCBDDCGraph graph; 2140 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2141 PetscInt i,n; 2142 PetscInt *xadj,*adjncy; 2143 PetscBool isplex = PETSC_FALSE; 2144 PetscErrorCode ierr; 2145 2146 PetscFunctionBegin; 2147 if (ncc) *ncc = 0; 2148 if (cc) *cc = NULL; 2149 if (primalv) *primalv = NULL; 2150 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2151 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2152 if (!dm) { 2153 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2154 } 2155 if (dm) { 2156 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2157 } 2158 if (filter) isplex = PETSC_FALSE; 2159 2160 if (isplex) { /* this code has been modified from plexpartition.c */ 2161 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2162 PetscInt *adj = NULL; 2163 IS cellNumbering; 2164 const PetscInt *cellNum; 2165 PetscBool useCone, useClosure; 2166 PetscSection section; 2167 PetscSegBuffer adjBuffer; 2168 PetscSF sfPoint; 2169 PetscErrorCode ierr; 2170 2171 PetscFunctionBegin; 2172 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2173 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2174 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2175 /* Build adjacency graph via a section/segbuffer */ 2176 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2177 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2178 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2179 /* Always use FVM adjacency to create partitioner graph */ 2180 ierr = DMPlexGetAdjacencyUseCone(dm, &useCone);CHKERRQ(ierr); 2181 ierr = DMPlexGetAdjacencyUseClosure(dm, &useClosure);CHKERRQ(ierr); 2182 ierr = DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);CHKERRQ(ierr); 2183 ierr = DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);CHKERRQ(ierr); 2184 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2185 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2186 for (n = 0, p = pStart; p < pEnd; p++) { 2187 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2188 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2189 adjSize = PETSC_DETERMINE; 2190 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2191 for (a = 0; a < adjSize; ++a) { 2192 const PetscInt point = adj[a]; 2193 if (pStart <= point && point < pEnd) { 2194 PetscInt *PETSC_RESTRICT pBuf; 2195 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2196 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2197 *pBuf = point; 2198 } 2199 } 2200 n++; 2201 } 2202 ierr = DMPlexSetAdjacencyUseCone(dm, useCone);CHKERRQ(ierr); 2203 ierr = DMPlexSetAdjacencyUseClosure(dm, useClosure);CHKERRQ(ierr); 2204 /* Derive CSR graph from section/segbuffer */ 2205 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2206 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2207 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2208 for (idx = 0, p = pStart; p < pEnd; p++) { 2209 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2210 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2211 } 2212 xadj[n] = size; 2213 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2214 /* Clean up */ 2215 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2216 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2217 ierr = PetscFree(adj);CHKERRQ(ierr); 2218 graph->xadj = xadj; 2219 graph->adjncy = adjncy; 2220 } else { 2221 Mat A; 2222 PetscBool isseqaij, flg_row; 2223 2224 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2225 if (!A->rmap->N || !A->cmap->N) { 2226 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2227 PetscFunctionReturn(0); 2228 } 2229 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2230 if (!isseqaij && filter) { 2231 PetscBool isseqdense; 2232 2233 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2234 if (!isseqdense) { 2235 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2236 } else { /* TODO: rectangular case and LDA */ 2237 PetscScalar *array; 2238 PetscReal chop=1.e-6; 2239 2240 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2241 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2242 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2243 for (i=0;i<n;i++) { 2244 PetscInt j; 2245 for (j=i+1;j<n;j++) { 2246 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2247 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2248 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2249 } 2250 } 2251 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2252 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2253 } 2254 } else { 2255 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2256 B = A; 2257 } 2258 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2259 2260 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2261 if (filter) { 2262 PetscScalar *data; 2263 PetscInt j,cum; 2264 2265 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2266 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2267 cum = 0; 2268 for (i=0;i<n;i++) { 2269 PetscInt t; 2270 2271 for (j=xadj[i];j<xadj[i+1];j++) { 2272 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2273 continue; 2274 } 2275 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2276 } 2277 t = xadj_filtered[i]; 2278 xadj_filtered[i] = cum; 2279 cum += t; 2280 } 2281 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2282 graph->xadj = xadj_filtered; 2283 graph->adjncy = adjncy_filtered; 2284 } else { 2285 graph->xadj = xadj; 2286 graph->adjncy = adjncy; 2287 } 2288 } 2289 /* compute local connected components using PCBDDCGraph */ 2290 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2291 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2292 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2293 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2294 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2295 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2296 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2297 2298 /* partial clean up */ 2299 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2300 if (B) { 2301 PetscBool flg_row; 2302 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2303 ierr = MatDestroy(&B);CHKERRQ(ierr); 2304 } 2305 if (isplex) { 2306 ierr = PetscFree(xadj);CHKERRQ(ierr); 2307 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2308 } 2309 2310 /* get back data */ 2311 if (isplex) { 2312 if (ncc) *ncc = graph->ncc; 2313 if (cc || primalv) { 2314 Mat A; 2315 PetscBT btv,btvt; 2316 PetscSection subSection; 2317 PetscInt *ids,cum,cump,*cids,*pids; 2318 2319 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2320 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2321 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2322 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2323 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2324 2325 cids[0] = 0; 2326 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2327 PetscInt j; 2328 2329 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2330 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2331 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2332 2333 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2334 for (k = 0; k < 2*size; k += 2) { 2335 PetscInt s, p = closure[k], off, dof, cdof; 2336 2337 ierr = PetscSectionGetConstraintDof(subSection, p, &cdof);CHKERRQ(ierr); 2338 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2339 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2340 for (s = 0; s < dof-cdof; s++) { 2341 if (PetscBTLookupSet(btvt,off+s)) continue; 2342 if (!PetscBTLookup(btv,off+s)) { 2343 ids[cum++] = off+s; 2344 } else { /* cross-vertex */ 2345 pids[cump++] = off+s; 2346 } 2347 } 2348 } 2349 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2350 } 2351 cids[i+1] = cum; 2352 /* mark dofs as already assigned */ 2353 for (j = cids[i]; j < cids[i+1]; j++) { 2354 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2355 } 2356 } 2357 if (cc) { 2358 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2359 for (i = 0; i < graph->ncc; i++) { 2360 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2361 } 2362 *cc = cc_n; 2363 } 2364 if (primalv) { 2365 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2366 } 2367 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2368 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2369 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2370 } 2371 } else { 2372 if (ncc) *ncc = graph->ncc; 2373 if (cc) { 2374 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2375 for (i=0;i<graph->ncc;i++) { 2376 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2377 } 2378 *cc = cc_n; 2379 } 2380 } 2381 /* clean up graph */ 2382 graph->xadj = 0; 2383 graph->adjncy = 0; 2384 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2385 PetscFunctionReturn(0); 2386 } 2387 2388 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2389 { 2390 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2391 PC_IS* pcis = (PC_IS*)(pc->data); 2392 IS dirIS = NULL; 2393 PetscInt i; 2394 PetscErrorCode ierr; 2395 2396 PetscFunctionBegin; 2397 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2398 if (zerodiag) { 2399 Mat A; 2400 Vec vec3_N; 2401 PetscScalar *vals; 2402 const PetscInt *idxs; 2403 PetscInt nz,*count; 2404 2405 /* p0 */ 2406 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2407 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2408 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2409 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2410 for (i=0;i<nz;i++) vals[i] = 1.; 2411 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2412 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2413 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2414 /* v_I */ 2415 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2416 for (i=0;i<nz;i++) vals[i] = 0.; 2417 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2418 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2419 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2420 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2421 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2422 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2423 if (dirIS) { 2424 PetscInt n; 2425 2426 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2427 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2428 for (i=0;i<n;i++) vals[i] = 0.; 2429 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2430 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2431 } 2432 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2433 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2434 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2435 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2436 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2437 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2438 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2439 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2440 ierr = PetscFree(vals);CHKERRQ(ierr); 2441 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2442 2443 /* there should not be any pressure dofs lying on the interface */ 2444 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2445 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2446 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2447 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2448 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2449 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]); 2450 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2451 ierr = PetscFree(count);CHKERRQ(ierr); 2452 } 2453 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2454 2455 /* check PCBDDCBenignGetOrSetP0 */ 2456 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2457 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2458 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2459 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2460 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2461 for (i=0;i<pcbddc->benign_n;i++) { 2462 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2463 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2464 } 2465 PetscFunctionReturn(0); 2466 } 2467 2468 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2469 { 2470 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2471 IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs; 2472 PetscInt nz,n,benign_n,bsp = 1; 2473 PetscInt *interior_dofs,n_interior_dofs,nneu; 2474 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2475 PetscErrorCode ierr; 2476 2477 PetscFunctionBegin; 2478 if (reuse) goto project_b0; 2479 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2480 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2481 for (n=0;n<pcbddc->benign_n;n++) { 2482 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2483 } 2484 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2485 has_null_pressures = PETSC_TRUE; 2486 have_null = PETSC_TRUE; 2487 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2488 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2489 Checks if all the pressure dofs in each subdomain have a zero diagonal 2490 If not, a change of basis on pressures is not needed 2491 since the local Schur complements are already SPD 2492 */ 2493 if (pcbddc->n_ISForDofsLocal) { 2494 IS iP = NULL; 2495 PetscInt p,*pp; 2496 PetscBool flg; 2497 2498 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp);CHKERRQ(ierr); 2499 n = pcbddc->n_ISForDofsLocal; 2500 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2501 ierr = PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg);CHKERRQ(ierr); 2502 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2503 if (!flg) { 2504 n = 1; 2505 pp[0] = pcbddc->n_ISForDofsLocal-1; 2506 } 2507 2508 bsp = 0; 2509 for (p=0;p<n;p++) { 2510 PetscInt bs; 2511 2512 if (pp[p] < 0 || pp[p] > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",pp[p]); 2513 ierr = ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);CHKERRQ(ierr); 2514 bsp += bs; 2515 } 2516 ierr = PetscMalloc1(bsp,&bzerodiag);CHKERRQ(ierr); 2517 bsp = 0; 2518 for (p=0;p<n;p++) { 2519 const PetscInt *idxs; 2520 PetscInt b,bs,npl,*bidxs; 2521 2522 ierr = ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);CHKERRQ(ierr); 2523 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl);CHKERRQ(ierr); 2524 ierr = ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);CHKERRQ(ierr); 2525 ierr = PetscMalloc1(npl/bs,&bidxs);CHKERRQ(ierr); 2526 for (b=0;b<bs;b++) { 2527 PetscInt i; 2528 2529 for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b]; 2530 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp]);CHKERRQ(ierr); 2531 bsp++; 2532 } 2533 ierr = PetscFree(bidxs);CHKERRQ(ierr); 2534 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);CHKERRQ(ierr); 2535 } 2536 ierr = ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures);CHKERRQ(ierr); 2537 2538 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2539 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2540 if (iP) { 2541 IS newpressures; 2542 2543 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2544 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2545 pressures = newpressures; 2546 } 2547 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2548 if (!sorted) { 2549 ierr = ISSort(pressures);CHKERRQ(ierr); 2550 } 2551 ierr = PetscFree(pp);CHKERRQ(ierr); 2552 } 2553 2554 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2555 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2556 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2557 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2558 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2559 if (!sorted) { 2560 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2561 } 2562 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2563 zerodiag_save = zerodiag; 2564 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2565 if (!nz) { 2566 if (n) have_null = PETSC_FALSE; 2567 has_null_pressures = PETSC_FALSE; 2568 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2569 } 2570 recompute_zerodiag = PETSC_FALSE; 2571 2572 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2573 zerodiag_subs = NULL; 2574 benign_n = 0; 2575 n_interior_dofs = 0; 2576 interior_dofs = NULL; 2577 nneu = 0; 2578 if (pcbddc->NeumannBoundariesLocal) { 2579 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2580 } 2581 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2582 if (checkb) { /* need to compute interior nodes */ 2583 PetscInt n,i,j; 2584 PetscInt n_neigh,*neigh,*n_shared,**shared; 2585 PetscInt *iwork; 2586 2587 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2588 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2589 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2590 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2591 for (i=1;i<n_neigh;i++) 2592 for (j=0;j<n_shared[i];j++) 2593 iwork[shared[i][j]] += 1; 2594 for (i=0;i<n;i++) 2595 if (!iwork[i]) 2596 interior_dofs[n_interior_dofs++] = i; 2597 ierr = PetscFree(iwork);CHKERRQ(ierr); 2598 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2599 } 2600 if (has_null_pressures) { 2601 IS *subs; 2602 PetscInt nsubs,i,j,nl; 2603 const PetscInt *idxs; 2604 PetscScalar *array; 2605 Vec *work; 2606 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2607 2608 subs = pcbddc->local_subs; 2609 nsubs = pcbddc->n_local_subs; 2610 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2611 if (checkb) { 2612 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2613 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2614 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2615 /* work[0] = 1_p */ 2616 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2617 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2618 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2619 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2620 /* work[0] = 1_v */ 2621 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2622 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2623 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2624 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2625 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2626 } 2627 2628 if (nsubs > 1 || bsp > 1) { 2629 IS *is; 2630 PetscInt b,totb; 2631 2632 totb = bsp; 2633 is = bsp > 1 ? bzerodiag : &zerodiag; 2634 nsubs = PetscMax(nsubs,1); 2635 ierr = PetscCalloc1(nsubs*totb,&zerodiag_subs);CHKERRQ(ierr); 2636 for (b=0;b<totb;b++) { 2637 for (i=0;i<nsubs;i++) { 2638 ISLocalToGlobalMapping l2g; 2639 IS t_zerodiag_subs; 2640 PetscInt nl; 2641 2642 if (subs) { 2643 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2644 } else { 2645 IS tis; 2646 2647 ierr = MatGetLocalSize(pcbddc->local_mat,&nl,NULL);CHKERRQ(ierr); 2648 ierr = ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis);CHKERRQ(ierr); 2649 ierr = ISLocalToGlobalMappingCreateIS(tis,&l2g);CHKERRQ(ierr); 2650 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2651 } 2652 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs);CHKERRQ(ierr); 2653 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2654 if (nl) { 2655 PetscBool valid = PETSC_TRUE; 2656 2657 if (checkb) { 2658 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2659 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2660 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2661 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2662 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2663 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2664 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2665 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2666 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2667 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2668 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2669 for (j=0;j<n_interior_dofs;j++) { 2670 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2671 valid = PETSC_FALSE; 2672 break; 2673 } 2674 } 2675 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2676 } 2677 if (valid && nneu) { 2678 const PetscInt *idxs; 2679 PetscInt nzb; 2680 2681 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2682 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2683 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2684 if (nzb) valid = PETSC_FALSE; 2685 } 2686 if (valid && pressures) { 2687 IS t_pressure_subs,tmp; 2688 PetscInt i1,i2; 2689 2690 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2691 ierr = ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp);CHKERRQ(ierr); 2692 ierr = ISGetLocalSize(tmp,&i1);CHKERRQ(ierr); 2693 ierr = ISGetLocalSize(t_zerodiag_subs,&i2);CHKERRQ(ierr); 2694 if (i2 != i1) valid = PETSC_FALSE; 2695 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2696 ierr = ISDestroy(&tmp);CHKERRQ(ierr); 2697 } 2698 if (valid) { 2699 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n]);CHKERRQ(ierr); 2700 benign_n++; 2701 } else recompute_zerodiag = PETSC_TRUE; 2702 } 2703 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2704 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2705 } 2706 } 2707 } else { /* there's just one subdomain (or zero if they have not been detected */ 2708 PetscBool valid = PETSC_TRUE; 2709 2710 if (nneu) valid = PETSC_FALSE; 2711 if (valid && pressures) { 2712 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2713 } 2714 if (valid && checkb) { 2715 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2716 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2717 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2718 for (j=0;j<n_interior_dofs;j++) { 2719 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2720 valid = PETSC_FALSE; 2721 break; 2722 } 2723 } 2724 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2725 } 2726 if (valid) { 2727 benign_n = 1; 2728 ierr = PetscMalloc1(benign_n,&zerodiag_subs);CHKERRQ(ierr); 2729 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2730 zerodiag_subs[0] = zerodiag; 2731 } 2732 } 2733 if (checkb) { 2734 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2735 } 2736 } 2737 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2738 2739 if (!benign_n) { 2740 PetscInt n; 2741 2742 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2743 recompute_zerodiag = PETSC_FALSE; 2744 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2745 if (n) { 2746 has_null_pressures = PETSC_FALSE; 2747 have_null = PETSC_FALSE; 2748 } 2749 } 2750 2751 /* final check for null pressures */ 2752 if (zerodiag && pressures) { 2753 ierr = ISEqual(pressures,zerodiag,&have_null);CHKERRQ(ierr); 2754 } 2755 2756 if (recompute_zerodiag) { 2757 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2758 if (benign_n == 1) { 2759 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2760 zerodiag = zerodiag_subs[0]; 2761 } else { 2762 PetscInt i,nzn,*new_idxs; 2763 2764 nzn = 0; 2765 for (i=0;i<benign_n;i++) { 2766 PetscInt ns; 2767 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2768 nzn += ns; 2769 } 2770 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2771 nzn = 0; 2772 for (i=0;i<benign_n;i++) { 2773 PetscInt ns,*idxs; 2774 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2775 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2776 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2777 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2778 nzn += ns; 2779 } 2780 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2781 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2782 } 2783 have_null = PETSC_FALSE; 2784 } 2785 2786 /* determines if the coarse solver will be singular or not */ 2787 ierr = MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2788 2789 /* Prepare matrix to compute no-net-flux */ 2790 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2791 Mat A,loc_divudotp; 2792 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2793 IS row,col,isused = NULL; 2794 PetscInt M,N,n,st,n_isused; 2795 2796 if (pressures) { 2797 isused = pressures; 2798 } else { 2799 isused = zerodiag_save; 2800 } 2801 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2802 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2803 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2804 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2805 n_isused = 0; 2806 if (isused) { 2807 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2808 } 2809 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2810 st = st-n_isused; 2811 if (n) { 2812 const PetscInt *gidxs; 2813 2814 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2815 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2816 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2817 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2818 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2819 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2820 } else { 2821 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2822 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2823 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2824 } 2825 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2826 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2827 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2828 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2829 ierr = ISDestroy(&row);CHKERRQ(ierr); 2830 ierr = ISDestroy(&col);CHKERRQ(ierr); 2831 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2832 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2833 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2834 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2835 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2836 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2837 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2838 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2839 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2840 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2841 } 2842 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2843 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2844 if (bzerodiag) { 2845 PetscInt i; 2846 2847 for (i=0;i<bsp;i++) { 2848 ierr = ISDestroy(&bzerodiag[i]);CHKERRQ(ierr); 2849 } 2850 ierr = PetscFree(bzerodiag);CHKERRQ(ierr); 2851 } 2852 pcbddc->benign_n = benign_n; 2853 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2854 2855 /* determines if the problem has subdomains with 0 pressure block */ 2856 have_null = (PetscBool)(!!pcbddc->benign_n); 2857 ierr = MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2858 2859 project_b0: 2860 /* change of basis and p0 dofs */ 2861 if (pcbddc->benign_n) { 2862 IS zerodiagc; 2863 const PetscInt *idxs,*idxsc; 2864 PetscInt i,s,*nnz; 2865 2866 if (!zerodiag) { 2867 ierr = ISConcatenate(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_zerodiag_subs,&zerodiag);CHKERRQ(ierr); 2868 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2869 } 2870 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2871 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2872 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2873 /* local change of basis for pressures */ 2874 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2875 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2876 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2877 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2878 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2879 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2880 for (i=0;i<pcbddc->benign_n;i++) { 2881 PetscInt nzs,j; 2882 2883 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2884 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2885 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2886 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2887 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2888 } 2889 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2890 ierr = MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2891 ierr = PetscFree(nnz);CHKERRQ(ierr); 2892 /* set identity on velocities */ 2893 for (i=0;i<n-nz;i++) { 2894 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2895 } 2896 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2897 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2898 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2899 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2900 /* set change on pressures */ 2901 for (s=0;s<pcbddc->benign_n;s++) { 2902 PetscScalar *array; 2903 PetscInt nzs; 2904 2905 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2906 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2907 for (i=0;i<nzs-1;i++) { 2908 PetscScalar vals[2]; 2909 PetscInt cols[2]; 2910 2911 cols[0] = idxs[i]; 2912 cols[1] = idxs[nzs-1]; 2913 vals[0] = 1.; 2914 vals[1] = 1.; 2915 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2916 } 2917 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2918 for (i=0;i<nzs-1;i++) array[i] = -1.; 2919 array[nzs-1] = 1.; 2920 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2921 /* store local idxs for p0 */ 2922 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2923 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2924 ierr = PetscFree(array);CHKERRQ(ierr); 2925 } 2926 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2927 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2928 2929 /* project if needed */ 2930 if (pcbddc->benign_change_explicit) { 2931 Mat M; 2932 2933 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2934 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2935 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2936 ierr = MatDestroy(&M);CHKERRQ(ierr); 2937 } 2938 /* store global idxs for p0 */ 2939 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2940 } 2941 *zerodiaglocal = zerodiag; 2942 PetscFunctionReturn(0); 2943 } 2944 2945 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2946 { 2947 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2948 PetscScalar *array; 2949 PetscErrorCode ierr; 2950 2951 PetscFunctionBegin; 2952 if (!pcbddc->benign_sf) { 2953 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2954 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2955 } 2956 if (get) { 2957 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2958 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2959 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2960 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2961 } else { 2962 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2963 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2964 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2965 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2966 } 2967 PetscFunctionReturn(0); 2968 } 2969 2970 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2971 { 2972 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2973 PetscErrorCode ierr; 2974 2975 PetscFunctionBegin; 2976 /* TODO: add error checking 2977 - avoid nested pop (or push) calls. 2978 - cannot push before pop. 2979 - cannot call this if pcbddc->local_mat is NULL 2980 */ 2981 if (!pcbddc->benign_n) { 2982 PetscFunctionReturn(0); 2983 } 2984 if (pop) { 2985 if (pcbddc->benign_change_explicit) { 2986 IS is_p0; 2987 MatReuse reuse; 2988 2989 /* extract B_0 */ 2990 reuse = MAT_INITIAL_MATRIX; 2991 if (pcbddc->benign_B0) { 2992 reuse = MAT_REUSE_MATRIX; 2993 } 2994 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2995 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2996 /* remove rows and cols from local problem */ 2997 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2998 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2999 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 3000 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3001 } else { 3002 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 3003 PetscScalar *vals; 3004 PetscInt i,n,*idxs_ins; 3005 3006 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 3007 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 3008 if (!pcbddc->benign_B0) { 3009 PetscInt *nnz; 3010 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 3011 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 3012 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 3013 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 3014 for (i=0;i<pcbddc->benign_n;i++) { 3015 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 3016 nnz[i] = n - nnz[i]; 3017 } 3018 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 3019 ierr = MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3020 ierr = PetscFree(nnz);CHKERRQ(ierr); 3021 } 3022 3023 for (i=0;i<pcbddc->benign_n;i++) { 3024 PetscScalar *array; 3025 PetscInt *idxs,j,nz,cum; 3026 3027 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 3028 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3029 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3030 for (j=0;j<nz;j++) vals[j] = 1.; 3031 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3032 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 3033 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 3034 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 3035 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 3036 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 3037 cum = 0; 3038 for (j=0;j<n;j++) { 3039 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3040 vals[cum] = array[j]; 3041 idxs_ins[cum] = j; 3042 cum++; 3043 } 3044 } 3045 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 3046 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 3047 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3048 } 3049 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3050 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3051 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 3052 } 3053 } else { /* push */ 3054 if (pcbddc->benign_change_explicit) { 3055 PetscInt i; 3056 3057 for (i=0;i<pcbddc->benign_n;i++) { 3058 PetscScalar *B0_vals; 3059 PetscInt *B0_cols,B0_ncol; 3060 3061 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3062 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3063 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3064 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 3065 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3066 } 3067 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3068 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3069 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!"); 3070 } 3071 PetscFunctionReturn(0); 3072 } 3073 3074 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3075 { 3076 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3077 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3078 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3079 PetscBLASInt *B_iwork,*B_ifail; 3080 PetscScalar *work,lwork; 3081 PetscScalar *St,*S,*eigv; 3082 PetscScalar *Sarray,*Starray; 3083 PetscReal *eigs,thresh,lthresh,uthresh; 3084 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3085 PetscBool allocated_S_St; 3086 #if defined(PETSC_USE_COMPLEX) 3087 PetscReal *rwork; 3088 #endif 3089 PetscErrorCode ierr; 3090 3091 PetscFunctionBegin; 3092 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3093 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3094 if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3095 ierr = PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3096 3097 if (pcbddc->dbg_flag) { 3098 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3099 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3100 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3101 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3102 } 3103 3104 if (pcbddc->dbg_flag) { 3105 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);CHKERRQ(ierr); 3106 } 3107 3108 /* max size of subsets */ 3109 mss = 0; 3110 for (i=0;i<sub_schurs->n_subs;i++) { 3111 PetscInt subset_size; 3112 3113 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3114 mss = PetscMax(mss,subset_size); 3115 } 3116 3117 /* min/max and threshold */ 3118 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3119 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3120 nmax = PetscMax(nmin,nmax); 3121 allocated_S_St = PETSC_FALSE; 3122 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3123 allocated_S_St = PETSC_TRUE; 3124 } 3125 3126 /* allocate lapack workspace */ 3127 cum = cum2 = 0; 3128 maxneigs = 0; 3129 for (i=0;i<sub_schurs->n_subs;i++) { 3130 PetscInt n,subset_size; 3131 3132 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3133 n = PetscMin(subset_size,nmax); 3134 cum += subset_size; 3135 cum2 += subset_size*n; 3136 maxneigs = PetscMax(maxneigs,n); 3137 } 3138 if (mss) { 3139 if (sub_schurs->is_symmetric) { 3140 PetscBLASInt B_itype = 1; 3141 PetscBLASInt B_N = mss; 3142 PetscReal zero = 0.0; 3143 PetscReal eps = 0.0; /* dlamch? */ 3144 3145 B_lwork = -1; 3146 S = NULL; 3147 St = NULL; 3148 eigs = NULL; 3149 eigv = NULL; 3150 B_iwork = NULL; 3151 B_ifail = NULL; 3152 #if defined(PETSC_USE_COMPLEX) 3153 rwork = NULL; 3154 #endif 3155 thresh = 1.0; 3156 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3157 #if defined(PETSC_USE_COMPLEX) 3158 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3159 #else 3160 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3161 #endif 3162 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3163 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3164 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3165 } else { 3166 lwork = 0; 3167 } 3168 3169 nv = 0; 3170 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3171 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3172 } 3173 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3174 if (allocated_S_St) { 3175 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3176 } 3177 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3178 #if defined(PETSC_USE_COMPLEX) 3179 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3180 #endif 3181 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3182 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3183 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3184 nv+cum,&pcbddc->adaptive_constraints_idxs, 3185 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3186 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 3187 3188 maxneigs = 0; 3189 cum = cumarray = 0; 3190 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3191 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3192 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3193 const PetscInt *idxs; 3194 3195 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3196 for (cum=0;cum<nv;cum++) { 3197 pcbddc->adaptive_constraints_n[cum] = 1; 3198 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3199 pcbddc->adaptive_constraints_data[cum] = 1.0; 3200 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3201 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3202 } 3203 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3204 } 3205 3206 if (mss) { /* multilevel */ 3207 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3208 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3209 } 3210 3211 lthresh = pcbddc->adaptive_threshold[0]; 3212 uthresh = pcbddc->adaptive_threshold[1]; 3213 for (i=0;i<sub_schurs->n_subs;i++) { 3214 const PetscInt *idxs; 3215 PetscReal upper,lower; 3216 PetscInt j,subset_size,eigs_start = 0; 3217 PetscBLASInt B_N; 3218 PetscBool same_data = PETSC_FALSE; 3219 PetscBool scal = PETSC_FALSE; 3220 3221 if (pcbddc->use_deluxe_scaling) { 3222 upper = PETSC_MAX_REAL; 3223 lower = uthresh; 3224 } else { 3225 if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3226 upper = 1./uthresh; 3227 lower = 0.; 3228 } 3229 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3230 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3231 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3232 /* this is experimental: we assume the dofs have been properly grouped to have 3233 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3234 if (!sub_schurs->is_posdef) { 3235 Mat T; 3236 3237 for (j=0;j<subset_size;j++) { 3238 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3239 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);CHKERRQ(ierr); 3240 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3241 ierr = MatDestroy(&T);CHKERRQ(ierr); 3242 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);CHKERRQ(ierr); 3243 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3244 ierr = MatDestroy(&T);CHKERRQ(ierr); 3245 if (sub_schurs->change_primal_sub) { 3246 PetscInt nz,k; 3247 const PetscInt *idxs; 3248 3249 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);CHKERRQ(ierr); 3250 ierr = ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3251 for (k=0;k<nz;k++) { 3252 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3253 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3254 } 3255 ierr = ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3256 } 3257 scal = PETSC_TRUE; 3258 break; 3259 } 3260 } 3261 } 3262 3263 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3264 if (sub_schurs->is_symmetric) { 3265 PetscInt j,k; 3266 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 3267 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3268 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3269 } 3270 for (j=0;j<subset_size;j++) { 3271 for (k=j;k<subset_size;k++) { 3272 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3273 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3274 } 3275 } 3276 } else { 3277 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3278 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3279 } 3280 } else { 3281 S = Sarray + cumarray; 3282 St = Starray + cumarray; 3283 } 3284 /* see if we can save some work */ 3285 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3286 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 3287 } 3288 3289 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3290 B_neigs = 0; 3291 } else { 3292 if (sub_schurs->is_symmetric) { 3293 PetscBLASInt B_itype = 1; 3294 PetscBLASInt B_IL, B_IU; 3295 PetscReal eps = -1.0; /* dlamch? */ 3296 PetscInt nmin_s; 3297 PetscBool compute_range; 3298 3299 B_neigs = 0; 3300 compute_range = (PetscBool)!same_data; 3301 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3302 3303 if (pcbddc->dbg_flag) { 3304 PetscInt nc = 0; 3305 3306 if (sub_schurs->change_primal_sub) { 3307 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);CHKERRQ(ierr); 3308 } 3309 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);CHKERRQ(ierr); 3310 } 3311 3312 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3313 if (compute_range) { 3314 3315 /* ask for eigenvalues larger than thresh */ 3316 if (sub_schurs->is_posdef) { 3317 #if defined(PETSC_USE_COMPLEX) 3318 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3319 #else 3320 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3321 #endif 3322 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3323 } else { /* no theory so far, but it works nicely */ 3324 PetscInt recipe = 0,recipe_m = 1; 3325 PetscReal bb[2]; 3326 3327 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);CHKERRQ(ierr); 3328 switch (recipe) { 3329 case 0: 3330 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3331 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3332 #if defined(PETSC_USE_COMPLEX) 3333 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3334 #else 3335 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3336 #endif 3337 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3338 break; 3339 case 1: 3340 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3341 #if defined(PETSC_USE_COMPLEX) 3342 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3343 #else 3344 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3345 #endif 3346 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3347 if (!scal) { 3348 PetscBLASInt B_neigs2 = 0; 3349 3350 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3351 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3352 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3353 #if defined(PETSC_USE_COMPLEX) 3354 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3355 #else 3356 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3357 #endif 3358 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3359 B_neigs += B_neigs2; 3360 } 3361 break; 3362 case 2: 3363 if (scal) { 3364 bb[0] = PETSC_MIN_REAL; 3365 bb[1] = 0; 3366 #if defined(PETSC_USE_COMPLEX) 3367 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3368 #else 3369 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3370 #endif 3371 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3372 } else { 3373 PetscBLASInt B_neigs2 = 0; 3374 PetscBool import = PETSC_FALSE; 3375 3376 lthresh = PetscMax(lthresh,0.0); 3377 if (lthresh > 0.0) { 3378 bb[0] = PETSC_MIN_REAL; 3379 bb[1] = lthresh*lthresh; 3380 3381 import = PETSC_TRUE; 3382 #if defined(PETSC_USE_COMPLEX) 3383 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3384 #else 3385 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3386 #endif 3387 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3388 } 3389 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3390 bb[1] = PETSC_MAX_REAL; 3391 if (import) { 3392 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3393 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3394 } 3395 #if defined(PETSC_USE_COMPLEX) 3396 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3397 #else 3398 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3399 #endif 3400 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3401 B_neigs += B_neigs2; 3402 } 3403 break; 3404 case 3: 3405 if (scal) { 3406 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);CHKERRQ(ierr); 3407 } else { 3408 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);CHKERRQ(ierr); 3409 } 3410 if (!scal) { 3411 bb[0] = uthresh; 3412 bb[1] = PETSC_MAX_REAL; 3413 #if defined(PETSC_USE_COMPLEX) 3414 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3415 #else 3416 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3417 #endif 3418 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3419 } 3420 if (recipe_m > 0 && B_N - B_neigs > 0) { 3421 PetscBLASInt B_neigs2 = 0; 3422 3423 B_IL = 1; 3424 ierr = PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);CHKERRQ(ierr); 3425 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3426 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3427 #if defined(PETSC_USE_COMPLEX) 3428 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3429 #else 3430 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3431 #endif 3432 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3433 B_neigs += B_neigs2; 3434 } 3435 break; 3436 case 4: 3437 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3438 #if defined(PETSC_USE_COMPLEX) 3439 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3440 #else 3441 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3442 #endif 3443 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3444 { 3445 PetscBLASInt B_neigs2 = 0; 3446 3447 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL; 3448 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3449 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3450 #if defined(PETSC_USE_COMPLEX) 3451 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3452 #else 3453 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3454 #endif 3455 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3456 B_neigs += B_neigs2; 3457 } 3458 break; 3459 case 5: /* same as before: first compute all eigenvalues, then filter */ 3460 #if defined(PETSC_USE_COMPLEX) 3461 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3462 #else 3463 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3464 #endif 3465 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3466 { 3467 PetscInt e,k,ne; 3468 for (e=0,ne=0;e<B_neigs;e++) { 3469 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3470 for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k]; 3471 eigs[ne] = eigs[e]; 3472 ne++; 3473 } 3474 } 3475 ierr = PetscMemcpy(eigv,S,B_N*ne*sizeof(PetscScalar));CHKERRQ(ierr); 3476 B_neigs = ne; 3477 } 3478 break; 3479 default: 3480 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe); 3481 break; 3482 } 3483 } 3484 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3485 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3486 B_IL = 1; 3487 #if defined(PETSC_USE_COMPLEX) 3488 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3489 #else 3490 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3491 #endif 3492 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3493 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3494 PetscInt k; 3495 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3496 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3497 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3498 nmin = nmax; 3499 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 3500 for (k=0;k<nmax;k++) { 3501 eigs[k] = 1./PETSC_SMALL; 3502 eigv[k*(subset_size+1)] = 1.0; 3503 } 3504 } 3505 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3506 if (B_ierr) { 3507 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3508 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3509 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3510 } 3511 3512 if (B_neigs > nmax) { 3513 if (pcbddc->dbg_flag) { 3514 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %D.\n",B_neigs,nmax);CHKERRQ(ierr); 3515 } 3516 if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax; 3517 B_neigs = nmax; 3518 } 3519 3520 nmin_s = PetscMin(nmin,B_N); 3521 if (B_neigs < nmin_s) { 3522 PetscBLASInt B_neigs2 = 0; 3523 3524 if (pcbddc->use_deluxe_scaling) { 3525 if (scal) { 3526 B_IU = nmin_s; 3527 B_IL = B_neigs + 1; 3528 } else { 3529 B_IL = B_N - nmin_s + 1; 3530 B_IU = B_N - B_neigs; 3531 } 3532 } else { 3533 B_IL = B_neigs + 1; 3534 B_IU = nmin_s; 3535 } 3536 if (pcbddc->dbg_flag) { 3537 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);CHKERRQ(ierr); 3538 } 3539 if (sub_schurs->is_symmetric) { 3540 PetscInt j,k; 3541 for (j=0;j<subset_size;j++) { 3542 for (k=j;k<subset_size;k++) { 3543 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3544 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3545 } 3546 } 3547 } else { 3548 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3549 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3550 } 3551 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3552 #if defined(PETSC_USE_COMPLEX) 3553 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3554 #else 3555 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3556 #endif 3557 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3558 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3559 B_neigs += B_neigs2; 3560 } 3561 if (B_ierr) { 3562 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3563 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3564 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3565 } 3566 if (pcbddc->dbg_flag) { 3567 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3568 for (j=0;j<B_neigs;j++) { 3569 if (eigs[j] == 0.0) { 3570 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3571 } else { 3572 if (pcbddc->use_deluxe_scaling) { 3573 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3574 } else { 3575 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3576 } 3577 } 3578 } 3579 } 3580 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3581 } 3582 /* change the basis back to the original one */ 3583 if (sub_schurs->change) { 3584 Mat change,phi,phit; 3585 3586 if (pcbddc->dbg_flag > 2) { 3587 PetscInt ii; 3588 for (ii=0;ii<B_neigs;ii++) { 3589 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3590 for (j=0;j<B_N;j++) { 3591 #if defined(PETSC_USE_COMPLEX) 3592 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3593 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3594 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3595 #else 3596 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3597 #endif 3598 } 3599 } 3600 } 3601 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3602 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3603 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3604 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3605 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3606 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3607 } 3608 maxneigs = PetscMax(B_neigs,maxneigs); 3609 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3610 if (B_neigs) { 3611 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3612 3613 if (pcbddc->dbg_flag > 1) { 3614 PetscInt ii; 3615 for (ii=0;ii<B_neigs;ii++) { 3616 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3617 for (j=0;j<B_N;j++) { 3618 #if defined(PETSC_USE_COMPLEX) 3619 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3620 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3621 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3622 #else 3623 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3624 #endif 3625 } 3626 } 3627 } 3628 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3629 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3630 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3631 cum++; 3632 } 3633 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3634 /* shift for next computation */ 3635 cumarray += subset_size*subset_size; 3636 } 3637 if (pcbddc->dbg_flag) { 3638 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3639 } 3640 3641 if (mss) { 3642 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3643 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3644 /* destroy matrices (junk) */ 3645 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3646 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3647 } 3648 if (allocated_S_St) { 3649 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3650 } 3651 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3652 #if defined(PETSC_USE_COMPLEX) 3653 ierr = PetscFree(rwork);CHKERRQ(ierr); 3654 #endif 3655 if (pcbddc->dbg_flag) { 3656 PetscInt maxneigs_r; 3657 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3658 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);CHKERRQ(ierr); 3659 } 3660 ierr = PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3661 PetscFunctionReturn(0); 3662 } 3663 3664 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3665 { 3666 PetscScalar *coarse_submat_vals; 3667 PetscErrorCode ierr; 3668 3669 PetscFunctionBegin; 3670 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3671 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3672 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3673 3674 /* Setup local neumann solver ksp_R */ 3675 /* PCBDDCSetUpLocalScatters should be called first! */ 3676 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3677 3678 /* 3679 Setup local correction and local part of coarse basis. 3680 Gives back the dense local part of the coarse matrix in column major ordering 3681 */ 3682 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3683 3684 /* Compute total number of coarse nodes and setup coarse solver */ 3685 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3686 3687 /* free */ 3688 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3689 PetscFunctionReturn(0); 3690 } 3691 3692 PetscErrorCode PCBDDCResetCustomization(PC pc) 3693 { 3694 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3695 PetscErrorCode ierr; 3696 3697 PetscFunctionBegin; 3698 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3699 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3700 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3701 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3702 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3703 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3704 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3705 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3706 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3707 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3708 PetscFunctionReturn(0); 3709 } 3710 3711 PetscErrorCode PCBDDCResetTopography(PC pc) 3712 { 3713 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3714 PetscInt i; 3715 PetscErrorCode ierr; 3716 3717 PetscFunctionBegin; 3718 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3719 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3720 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3721 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3722 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3723 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3724 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3725 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3726 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3727 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3728 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3729 for (i=0;i<pcbddc->n_local_subs;i++) { 3730 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3731 } 3732 pcbddc->n_local_subs = 0; 3733 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3734 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3735 pcbddc->graphanalyzed = PETSC_FALSE; 3736 pcbddc->recompute_topography = PETSC_TRUE; 3737 pcbddc->corner_selected = PETSC_FALSE; 3738 PetscFunctionReturn(0); 3739 } 3740 3741 PetscErrorCode PCBDDCResetSolvers(PC pc) 3742 { 3743 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3744 PetscErrorCode ierr; 3745 3746 PetscFunctionBegin; 3747 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3748 if (pcbddc->coarse_phi_B) { 3749 PetscScalar *array; 3750 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3751 ierr = PetscFree(array);CHKERRQ(ierr); 3752 } 3753 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3754 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3755 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3756 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3757 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3758 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3759 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3760 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3761 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3762 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3763 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3764 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3765 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3766 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3767 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3768 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3769 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3770 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3771 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3772 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3773 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3774 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3775 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3776 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3777 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3778 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3779 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3780 if (pcbddc->benign_zerodiag_subs) { 3781 PetscInt i; 3782 for (i=0;i<pcbddc->benign_n;i++) { 3783 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3784 } 3785 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3786 } 3787 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3788 PetscFunctionReturn(0); 3789 } 3790 3791 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3792 { 3793 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3794 PC_IS *pcis = (PC_IS*)pc->data; 3795 VecType impVecType; 3796 PetscInt n_constraints,n_R,old_size; 3797 PetscErrorCode ierr; 3798 3799 PetscFunctionBegin; 3800 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3801 n_R = pcis->n - pcbddc->n_vertices; 3802 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3803 /* local work vectors (try to avoid unneeded work)*/ 3804 /* R nodes */ 3805 old_size = -1; 3806 if (pcbddc->vec1_R) { 3807 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3808 } 3809 if (n_R != old_size) { 3810 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3811 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3812 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3813 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3814 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3815 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3816 } 3817 /* local primal dofs */ 3818 old_size = -1; 3819 if (pcbddc->vec1_P) { 3820 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3821 } 3822 if (pcbddc->local_primal_size != old_size) { 3823 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3824 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3825 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3826 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3827 } 3828 /* local explicit constraints */ 3829 old_size = -1; 3830 if (pcbddc->vec1_C) { 3831 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3832 } 3833 if (n_constraints && n_constraints != old_size) { 3834 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3835 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3836 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3837 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3838 } 3839 PetscFunctionReturn(0); 3840 } 3841 3842 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3843 { 3844 PetscErrorCode ierr; 3845 /* pointers to pcis and pcbddc */ 3846 PC_IS* pcis = (PC_IS*)pc->data; 3847 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3848 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3849 /* submatrices of local problem */ 3850 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3851 /* submatrices of local coarse problem */ 3852 Mat S_VV,S_CV,S_VC,S_CC; 3853 /* working matrices */ 3854 Mat C_CR; 3855 /* additional working stuff */ 3856 PC pc_R; 3857 Mat F,Brhs = NULL; 3858 Vec dummy_vec; 3859 PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs; 3860 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3861 PetscScalar *work; 3862 PetscInt *idx_V_B; 3863 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3864 PetscInt i,n_R,n_D,n_B; 3865 3866 /* some shortcuts to scalars */ 3867 PetscScalar one=1.0,m_one=-1.0; 3868 3869 PetscFunctionBegin; 3870 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3871 ierr = PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3872 3873 /* Set Non-overlapping dimensions */ 3874 n_vertices = pcbddc->n_vertices; 3875 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3876 n_B = pcis->n_B; 3877 n_D = pcis->n - n_B; 3878 n_R = pcis->n - n_vertices; 3879 3880 /* vertices in boundary numbering */ 3881 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3882 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3883 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i); 3884 3885 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3886 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3887 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3888 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3889 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3890 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3891 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3892 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3893 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3894 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3895 3896 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3897 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3898 ierr = PCSetUp(pc_R);CHKERRQ(ierr); 3899 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3900 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3901 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3902 lda_rhs = n_R; 3903 need_benign_correction = PETSC_FALSE; 3904 if (isLU || isILU || isCHOL) { 3905 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3906 } else if (sub_schurs && sub_schurs->reuse_solver) { 3907 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3908 MatFactorType type; 3909 3910 F = reuse_solver->F; 3911 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3912 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3913 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3914 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3915 } else { 3916 F = NULL; 3917 } 3918 3919 /* determine if we can use a sparse right-hand side */ 3920 sparserhs = PETSC_FALSE; 3921 if (F) { 3922 MatSolverType solver; 3923 3924 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3925 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3926 } 3927 3928 /* allocate workspace */ 3929 n = 0; 3930 if (n_constraints) { 3931 n += lda_rhs*n_constraints; 3932 } 3933 if (n_vertices) { 3934 n = PetscMax(2*lda_rhs*n_vertices,n); 3935 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3936 } 3937 if (!pcbddc->symmetric_primal) { 3938 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3939 } 3940 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3941 3942 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3943 dummy_vec = NULL; 3944 if (need_benign_correction && lda_rhs != n_R && F) { 3945 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);CHKERRQ(ierr); 3946 ierr = VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);CHKERRQ(ierr); 3947 ierr = VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);CHKERRQ(ierr); 3948 } 3949 3950 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3951 if (n_constraints) { 3952 Mat M3,C_B; 3953 IS is_aux; 3954 PetscScalar *array,*array2; 3955 3956 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3957 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3958 3959 /* Extract constraints on R nodes: C_{CR} */ 3960 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3961 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3962 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3963 3964 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3965 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3966 if (!sparserhs) { 3967 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3968 for (i=0;i<n_constraints;i++) { 3969 const PetscScalar *row_cmat_values; 3970 const PetscInt *row_cmat_indices; 3971 PetscInt size_of_constraint,j; 3972 3973 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3974 for (j=0;j<size_of_constraint;j++) { 3975 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3976 } 3977 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3978 } 3979 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 3980 } else { 3981 Mat tC_CR; 3982 3983 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3984 if (lda_rhs != n_R) { 3985 PetscScalar *aa; 3986 PetscInt r,*ii,*jj; 3987 PetscBool done; 3988 3989 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3990 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 3991 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 3992 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 3993 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3994 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 3995 } else { 3996 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 3997 tC_CR = C_CR; 3998 } 3999 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 4000 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 4001 } 4002 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 4003 if (F) { 4004 if (need_benign_correction) { 4005 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4006 4007 /* rhs is already zero on interior dofs, no need to change the rhs */ 4008 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 4009 } 4010 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 4011 if (need_benign_correction) { 4012 PetscScalar *marr; 4013 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4014 4015 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4016 if (lda_rhs != n_R) { 4017 for (i=0;i<n_constraints;i++) { 4018 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4019 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4020 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4021 } 4022 } else { 4023 for (i=0;i<n_constraints;i++) { 4024 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4025 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4026 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4027 } 4028 } 4029 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4030 } 4031 } else { 4032 PetscScalar *marr; 4033 4034 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4035 for (i=0;i<n_constraints;i++) { 4036 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4037 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 4038 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4039 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4040 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4041 } 4042 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4043 } 4044 if (sparserhs) { 4045 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 4046 } 4047 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4048 if (!pcbddc->switch_static) { 4049 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4050 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 4051 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 4052 for (i=0;i<n_constraints;i++) { 4053 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 4054 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 4055 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4056 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4057 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4058 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4059 } 4060 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 4061 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 4062 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4063 } else { 4064 if (lda_rhs != n_R) { 4065 IS dummy; 4066 4067 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 4068 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4069 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 4070 } else { 4071 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 4072 pcbddc->local_auxmat2 = local_auxmat2_R; 4073 } 4074 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4075 } 4076 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4077 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 4078 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 4079 if (isCHOL) { 4080 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 4081 } else { 4082 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 4083 } 4084 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 4085 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4086 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 4087 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4088 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4089 ierr = MatDestroy(&M3);CHKERRQ(ierr); 4090 } 4091 4092 /* Get submatrices from subdomain matrix */ 4093 if (n_vertices) { 4094 IS is_aux; 4095 PetscBool isseqaij; 4096 4097 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4098 IS tis; 4099 4100 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 4101 ierr = ISSort(tis);CHKERRQ(ierr); 4102 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 4103 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4104 } else { 4105 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 4106 } 4107 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 4108 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 4109 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4110 if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */ 4111 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4112 } 4113 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 4114 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4115 } 4116 4117 /* Matrix of coarse basis functions (local) */ 4118 if (pcbddc->coarse_phi_B) { 4119 PetscInt on_B,on_primal,on_D=n_D; 4120 if (pcbddc->coarse_phi_D) { 4121 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 4122 } 4123 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 4124 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4125 PetscScalar *marray; 4126 4127 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 4128 ierr = PetscFree(marray);CHKERRQ(ierr); 4129 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4130 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4131 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4132 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4133 } 4134 } 4135 4136 if (!pcbddc->coarse_phi_B) { 4137 PetscScalar *marr; 4138 4139 /* memory size */ 4140 n = n_B*pcbddc->local_primal_size; 4141 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4142 if (!pcbddc->symmetric_primal) n *= 2; 4143 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 4144 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4145 marr += n_B*pcbddc->local_primal_size; 4146 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4147 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4148 marr += n_D*pcbddc->local_primal_size; 4149 } 4150 if (!pcbddc->symmetric_primal) { 4151 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4152 marr += n_B*pcbddc->local_primal_size; 4153 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4154 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4155 } 4156 } else { 4157 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 4158 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4159 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4160 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 4161 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4162 } 4163 } 4164 } 4165 4166 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4167 p0_lidx_I = NULL; 4168 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4169 const PetscInt *idxs; 4170 4171 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4172 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 4173 for (i=0;i<pcbddc->benign_n;i++) { 4174 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 4175 } 4176 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4177 } 4178 4179 /* vertices */ 4180 if (n_vertices) { 4181 PetscBool restoreavr = PETSC_FALSE; 4182 4183 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 4184 4185 if (n_R) { 4186 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4187 PetscBLASInt B_N,B_one = 1; 4188 PetscScalar *x,*y; 4189 4190 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 4191 if (need_benign_correction) { 4192 ISLocalToGlobalMapping RtoN; 4193 IS is_p0; 4194 PetscInt *idxs_p0,n; 4195 4196 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 4197 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 4198 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 4199 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n); 4200 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 4201 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 4202 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 4203 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 4204 } 4205 4206 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4207 if (!sparserhs || need_benign_correction) { 4208 if (lda_rhs == n_R) { 4209 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4210 } else { 4211 PetscScalar *av,*array; 4212 const PetscInt *xadj,*adjncy; 4213 PetscInt n; 4214 PetscBool flg_row; 4215 4216 array = work+lda_rhs*n_vertices; 4217 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4218 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4219 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4220 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 4221 for (i=0;i<n;i++) { 4222 PetscInt j; 4223 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4224 } 4225 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4226 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4227 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 4228 } 4229 if (need_benign_correction) { 4230 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4231 PetscScalar *marr; 4232 4233 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 4234 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4235 4236 | 0 0 0 | (V) 4237 L = | 0 0 -1 | (P-p0) 4238 | 0 0 -1 | (p0) 4239 4240 */ 4241 for (i=0;i<reuse_solver->benign_n;i++) { 4242 const PetscScalar *vals; 4243 const PetscInt *idxs,*idxs_zero; 4244 PetscInt n,j,nz; 4245 4246 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4247 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4248 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4249 for (j=0;j<n;j++) { 4250 PetscScalar val = vals[j]; 4251 PetscInt k,col = idxs[j]; 4252 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4253 } 4254 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4255 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4256 } 4257 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 4258 } 4259 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 4260 Brhs = A_RV; 4261 } else { 4262 Mat tA_RVT,A_RVT; 4263 4264 if (!pcbddc->symmetric_primal) { 4265 /* A_RV already scaled by -1 */ 4266 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 4267 } else { 4268 restoreavr = PETSC_TRUE; 4269 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4270 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 4271 A_RVT = A_VR; 4272 } 4273 if (lda_rhs != n_R) { 4274 PetscScalar *aa; 4275 PetscInt r,*ii,*jj; 4276 PetscBool done; 4277 4278 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4279 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4280 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4281 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4282 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4283 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4284 } else { 4285 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4286 tA_RVT = A_RVT; 4287 } 4288 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4289 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4290 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4291 } 4292 if (F) { 4293 /* need to correct the rhs */ 4294 if (need_benign_correction) { 4295 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4296 PetscScalar *marr; 4297 4298 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4299 if (lda_rhs != n_R) { 4300 for (i=0;i<n_vertices;i++) { 4301 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4302 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4303 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4304 } 4305 } else { 4306 for (i=0;i<n_vertices;i++) { 4307 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4308 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4309 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4310 } 4311 } 4312 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4313 } 4314 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4315 if (restoreavr) { 4316 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4317 } 4318 /* need to correct the solution */ 4319 if (need_benign_correction) { 4320 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4321 PetscScalar *marr; 4322 4323 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4324 if (lda_rhs != n_R) { 4325 for (i=0;i<n_vertices;i++) { 4326 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4327 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4328 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4329 } 4330 } else { 4331 for (i=0;i<n_vertices;i++) { 4332 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4333 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4334 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4335 } 4336 } 4337 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4338 } 4339 } else { 4340 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4341 for (i=0;i<n_vertices;i++) { 4342 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4343 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4344 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4345 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4346 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4347 } 4348 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4349 } 4350 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4351 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4352 /* S_VV and S_CV */ 4353 if (n_constraints) { 4354 Mat B; 4355 4356 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4357 for (i=0;i<n_vertices;i++) { 4358 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4359 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4360 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4361 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4362 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4363 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4364 } 4365 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4366 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 4367 ierr = MatDestroy(&B);CHKERRQ(ierr); 4368 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4369 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4370 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4371 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4372 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4373 ierr = MatDestroy(&B);CHKERRQ(ierr); 4374 } 4375 if (lda_rhs != n_R) { 4376 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4377 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4378 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4379 } 4380 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4381 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4382 if (need_benign_correction) { 4383 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4384 PetscScalar *marr,*sums; 4385 4386 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4387 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4388 for (i=0;i<reuse_solver->benign_n;i++) { 4389 const PetscScalar *vals; 4390 const PetscInt *idxs,*idxs_zero; 4391 PetscInt n,j,nz; 4392 4393 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4394 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4395 for (j=0;j<n_vertices;j++) { 4396 PetscInt k; 4397 sums[j] = 0.; 4398 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4399 } 4400 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4401 for (j=0;j<n;j++) { 4402 PetscScalar val = vals[j]; 4403 PetscInt k; 4404 for (k=0;k<n_vertices;k++) { 4405 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4406 } 4407 } 4408 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4409 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4410 } 4411 ierr = PetscFree(sums);CHKERRQ(ierr); 4412 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4413 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4414 } 4415 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4416 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4417 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 4418 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4419 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4420 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 4421 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4422 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4423 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4424 } else { 4425 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4426 } 4427 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4428 4429 /* coarse basis functions */ 4430 for (i=0;i<n_vertices;i++) { 4431 PetscScalar *y; 4432 4433 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4434 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4435 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4436 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4437 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4438 y[n_B*i+idx_V_B[i]] = 1.0; 4439 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4440 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4441 4442 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4443 PetscInt j; 4444 4445 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4446 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4447 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4448 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4449 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4450 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4451 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4452 } 4453 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4454 } 4455 /* if n_R == 0 the object is not destroyed */ 4456 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4457 } 4458 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4459 4460 if (n_constraints) { 4461 Mat B; 4462 4463 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4464 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4465 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4466 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4467 if (n_vertices) { 4468 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4469 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4470 } else { 4471 Mat S_VCt; 4472 4473 if (lda_rhs != n_R) { 4474 ierr = MatDestroy(&B);CHKERRQ(ierr); 4475 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4476 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4477 } 4478 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4479 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4480 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4481 } 4482 } 4483 ierr = MatDestroy(&B);CHKERRQ(ierr); 4484 /* coarse basis functions */ 4485 for (i=0;i<n_constraints;i++) { 4486 PetscScalar *y; 4487 4488 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4489 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4490 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4491 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4492 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4493 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4494 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4495 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4496 PetscInt j; 4497 4498 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4499 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4500 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4501 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4502 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4503 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4504 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4505 } 4506 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4507 } 4508 } 4509 if (n_constraints) { 4510 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4511 } 4512 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4513 4514 /* coarse matrix entries relative to B_0 */ 4515 if (pcbddc->benign_n) { 4516 Mat B0_B,B0_BPHI; 4517 IS is_dummy; 4518 PetscScalar *data; 4519 PetscInt j; 4520 4521 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4522 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4523 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4524 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4525 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4526 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 4527 for (j=0;j<pcbddc->benign_n;j++) { 4528 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4529 for (i=0;i<pcbddc->local_primal_size;i++) { 4530 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4531 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4532 } 4533 } 4534 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 4535 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4536 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4537 } 4538 4539 /* compute other basis functions for non-symmetric problems */ 4540 if (!pcbddc->symmetric_primal) { 4541 Mat B_V=NULL,B_C=NULL; 4542 PetscScalar *marray; 4543 4544 if (n_constraints) { 4545 Mat S_CCT,C_CRT; 4546 4547 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4548 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4549 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4550 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4551 if (n_vertices) { 4552 Mat S_VCT; 4553 4554 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4555 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4556 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4557 } 4558 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4559 } else { 4560 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4561 } 4562 if (n_vertices && n_R) { 4563 PetscScalar *av,*marray; 4564 const PetscInt *xadj,*adjncy; 4565 PetscInt n; 4566 PetscBool flg_row; 4567 4568 /* B_V = B_V - A_VR^T */ 4569 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4570 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4571 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4572 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4573 for (i=0;i<n;i++) { 4574 PetscInt j; 4575 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4576 } 4577 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4578 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4579 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4580 } 4581 4582 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4583 if (n_vertices) { 4584 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4585 for (i=0;i<n_vertices;i++) { 4586 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4587 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4588 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4589 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4590 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4591 } 4592 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4593 } 4594 if (B_C) { 4595 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4596 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4597 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4598 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4599 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4600 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4601 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4602 } 4603 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4604 } 4605 /* coarse basis functions */ 4606 for (i=0;i<pcbddc->local_primal_size;i++) { 4607 PetscScalar *y; 4608 4609 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4610 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4611 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4612 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4613 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4614 if (i<n_vertices) { 4615 y[n_B*i+idx_V_B[i]] = 1.0; 4616 } 4617 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4618 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4619 4620 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4621 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4622 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4623 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4624 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4625 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4626 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4627 } 4628 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4629 } 4630 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4631 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4632 } 4633 4634 /* free memory */ 4635 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4636 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4637 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4638 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4639 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4640 ierr = PetscFree(work);CHKERRQ(ierr); 4641 if (n_vertices) { 4642 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4643 } 4644 if (n_constraints) { 4645 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4646 } 4647 /* Checking coarse_sub_mat and coarse basis functios */ 4648 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4649 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4650 if (pcbddc->dbg_flag) { 4651 Mat coarse_sub_mat; 4652 Mat AUXMAT,TM1,TM2,TM3,TM4; 4653 Mat coarse_phi_D,coarse_phi_B; 4654 Mat coarse_psi_D,coarse_psi_B; 4655 Mat A_II,A_BB,A_IB,A_BI; 4656 Mat C_B,CPHI; 4657 IS is_dummy; 4658 Vec mones; 4659 MatType checkmattype=MATSEQAIJ; 4660 PetscReal real_value; 4661 4662 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4663 Mat A; 4664 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4665 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4666 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4667 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4668 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4669 ierr = MatDestroy(&A);CHKERRQ(ierr); 4670 } else { 4671 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4672 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4673 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4674 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4675 } 4676 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4677 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4678 if (!pcbddc->symmetric_primal) { 4679 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4680 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4681 } 4682 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4683 4684 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4685 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4686 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4687 if (!pcbddc->symmetric_primal) { 4688 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4689 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4690 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4691 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4692 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4693 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4694 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4695 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4696 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4697 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4698 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4699 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4700 } else { 4701 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4702 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4703 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4704 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4705 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4706 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4707 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4708 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4709 } 4710 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4711 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4712 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4713 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4714 if (pcbddc->benign_n) { 4715 Mat B0_B,B0_BPHI; 4716 PetscScalar *data,*data2; 4717 PetscInt j; 4718 4719 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4720 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4721 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4722 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4723 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4724 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4725 for (j=0;j<pcbddc->benign_n;j++) { 4726 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4727 for (i=0;i<pcbddc->local_primal_size;i++) { 4728 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4729 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4730 } 4731 } 4732 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4733 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4734 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4735 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4736 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4737 } 4738 #if 0 4739 { 4740 PetscViewer viewer; 4741 char filename[256]; 4742 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4743 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4744 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4745 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4746 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4747 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4748 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4749 if (pcbddc->coarse_phi_B) { 4750 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4751 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4752 } 4753 if (pcbddc->coarse_phi_D) { 4754 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4755 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4756 } 4757 if (pcbddc->coarse_psi_B) { 4758 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4759 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4760 } 4761 if (pcbddc->coarse_psi_D) { 4762 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4763 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4764 } 4765 ierr = PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");CHKERRQ(ierr); 4766 ierr = MatView(pcbddc->local_mat,viewer);CHKERRQ(ierr); 4767 ierr = PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");CHKERRQ(ierr); 4768 ierr = MatView(pcbddc->ConstraintMatrix,viewer);CHKERRQ(ierr); 4769 ierr = PetscObjectSetName((PetscObject)pcis->is_I_local,"I");CHKERRQ(ierr); 4770 ierr = ISView(pcis->is_I_local,viewer);CHKERRQ(ierr); 4771 ierr = PetscObjectSetName((PetscObject)pcis->is_B_local,"B");CHKERRQ(ierr); 4772 ierr = ISView(pcis->is_B_local,viewer);CHKERRQ(ierr); 4773 ierr = PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");CHKERRQ(ierr); 4774 ierr = ISView(pcbddc->is_R_local,viewer);CHKERRQ(ierr); 4775 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4776 } 4777 #endif 4778 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4779 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4780 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4781 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4782 4783 /* check constraints */ 4784 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4785 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4786 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4787 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4788 } else { 4789 PetscScalar *data; 4790 Mat tmat; 4791 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4792 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4793 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4794 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4795 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4796 } 4797 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4798 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4799 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4800 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4801 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4802 if (!pcbddc->symmetric_primal) { 4803 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4804 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4805 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4806 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4807 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4808 } 4809 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4810 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4811 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4812 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4813 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4814 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4815 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4816 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4817 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4818 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4819 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4820 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4821 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4822 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4823 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4824 if (!pcbddc->symmetric_primal) { 4825 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4826 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4827 } 4828 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4829 } 4830 /* get back data */ 4831 *coarse_submat_vals_n = coarse_submat_vals; 4832 ierr = PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 4833 PetscFunctionReturn(0); 4834 } 4835 4836 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4837 { 4838 Mat *work_mat; 4839 IS isrow_s,iscol_s; 4840 PetscBool rsorted,csorted; 4841 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4842 PetscErrorCode ierr; 4843 4844 PetscFunctionBegin; 4845 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4846 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4847 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4848 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4849 4850 if (!rsorted) { 4851 const PetscInt *idxs; 4852 PetscInt *idxs_sorted,i; 4853 4854 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4855 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4856 for (i=0;i<rsize;i++) { 4857 idxs_perm_r[i] = i; 4858 } 4859 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4860 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4861 for (i=0;i<rsize;i++) { 4862 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4863 } 4864 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4865 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4866 } else { 4867 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4868 isrow_s = isrow; 4869 } 4870 4871 if (!csorted) { 4872 if (isrow == iscol) { 4873 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4874 iscol_s = isrow_s; 4875 } else { 4876 const PetscInt *idxs; 4877 PetscInt *idxs_sorted,i; 4878 4879 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4880 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4881 for (i=0;i<csize;i++) { 4882 idxs_perm_c[i] = i; 4883 } 4884 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4885 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4886 for (i=0;i<csize;i++) { 4887 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4888 } 4889 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4890 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4891 } 4892 } else { 4893 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4894 iscol_s = iscol; 4895 } 4896 4897 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4898 4899 if (!rsorted || !csorted) { 4900 Mat new_mat; 4901 IS is_perm_r,is_perm_c; 4902 4903 if (!rsorted) { 4904 PetscInt *idxs_r,i; 4905 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4906 for (i=0;i<rsize;i++) { 4907 idxs_r[idxs_perm_r[i]] = i; 4908 } 4909 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4910 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4911 } else { 4912 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4913 } 4914 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4915 4916 if (!csorted) { 4917 if (isrow_s == iscol_s) { 4918 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4919 is_perm_c = is_perm_r; 4920 } else { 4921 PetscInt *idxs_c,i; 4922 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4923 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4924 for (i=0;i<csize;i++) { 4925 idxs_c[idxs_perm_c[i]] = i; 4926 } 4927 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4928 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4929 } 4930 } else { 4931 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4932 } 4933 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4934 4935 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4936 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4937 work_mat[0] = new_mat; 4938 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4939 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4940 } 4941 4942 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4943 *B = work_mat[0]; 4944 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4945 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4946 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4947 PetscFunctionReturn(0); 4948 } 4949 4950 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4951 { 4952 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4953 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4954 Mat new_mat,lA; 4955 IS is_local,is_global; 4956 PetscInt local_size; 4957 PetscBool isseqaij; 4958 PetscErrorCode ierr; 4959 4960 PetscFunctionBegin; 4961 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4962 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4963 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4964 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4965 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4966 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4967 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4968 4969 /* check */ 4970 if (pcbddc->dbg_flag) { 4971 Vec x,x_change; 4972 PetscReal error; 4973 4974 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4975 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4976 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4977 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4978 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4979 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4980 if (!pcbddc->change_interior) { 4981 const PetscScalar *x,*y,*v; 4982 PetscReal lerror = 0.; 4983 PetscInt i; 4984 4985 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4986 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4987 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4988 for (i=0;i<local_size;i++) 4989 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4990 lerror = PetscAbsScalar(x[i]-y[i]); 4991 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4992 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4993 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4994 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4995 if (error > PETSC_SMALL) { 4996 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4997 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error); 4998 } else { 4999 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error); 5000 } 5001 } 5002 } 5003 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5004 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5005 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 5006 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 5007 if (error > PETSC_SMALL) { 5008 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5009 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 5010 } else { 5011 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error); 5012 } 5013 } 5014 ierr = VecDestroy(&x);CHKERRQ(ierr); 5015 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5016 } 5017 5018 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5019 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 5020 5021 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5022 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5023 if (isseqaij) { 5024 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5025 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 5026 if (lA) { 5027 Mat work; 5028 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 5029 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 5030 ierr = MatDestroy(&work);CHKERRQ(ierr); 5031 } 5032 } else { 5033 Mat work_mat; 5034 5035 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5036 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5037 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 5038 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 5039 if (lA) { 5040 Mat work; 5041 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5042 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 5043 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 5044 ierr = MatDestroy(&work);CHKERRQ(ierr); 5045 } 5046 } 5047 if (matis->A->symmetric_set) { 5048 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 5049 #if !defined(PETSC_USE_COMPLEX) 5050 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 5051 #endif 5052 } 5053 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 5054 PetscFunctionReturn(0); 5055 } 5056 5057 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5058 { 5059 PC_IS* pcis = (PC_IS*)(pc->data); 5060 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5061 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5062 PetscInt *idx_R_local=NULL; 5063 PetscInt n_vertices,i,j,n_R,n_D,n_B; 5064 PetscInt vbs,bs; 5065 PetscBT bitmask=NULL; 5066 PetscErrorCode ierr; 5067 5068 PetscFunctionBegin; 5069 /* 5070 No need to setup local scatters if 5071 - primal space is unchanged 5072 AND 5073 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5074 AND 5075 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5076 */ 5077 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 5078 PetscFunctionReturn(0); 5079 } 5080 /* destroy old objects */ 5081 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 5082 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 5083 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 5084 /* Set Non-overlapping dimensions */ 5085 n_B = pcis->n_B; 5086 n_D = pcis->n - n_B; 5087 n_vertices = pcbddc->n_vertices; 5088 5089 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5090 5091 /* create auxiliary bitmask and allocate workspace */ 5092 if (!sub_schurs || !sub_schurs->reuse_solver) { 5093 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 5094 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 5095 for (i=0;i<n_vertices;i++) { 5096 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 5097 } 5098 5099 for (i=0, n_R=0; i<pcis->n; i++) { 5100 if (!PetscBTLookup(bitmask,i)) { 5101 idx_R_local[n_R++] = i; 5102 } 5103 } 5104 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5105 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5106 5107 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5108 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 5109 } 5110 5111 /* Block code */ 5112 vbs = 1; 5113 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 5114 if (bs>1 && !(n_vertices%bs)) { 5115 PetscBool is_blocked = PETSC_TRUE; 5116 PetscInt *vary; 5117 if (!sub_schurs || !sub_schurs->reuse_solver) { 5118 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 5119 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 5120 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5121 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5122 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5123 for (i=0; i<pcis->n/bs; i++) { 5124 if (vary[i]!=0 && vary[i]!=bs) { 5125 is_blocked = PETSC_FALSE; 5126 break; 5127 } 5128 } 5129 ierr = PetscFree(vary);CHKERRQ(ierr); 5130 } else { 5131 /* Verify directly the R set */ 5132 for (i=0; i<n_R/bs; i++) { 5133 PetscInt j,node=idx_R_local[bs*i]; 5134 for (j=1; j<bs; j++) { 5135 if (node != idx_R_local[bs*i+j]-j) { 5136 is_blocked = PETSC_FALSE; 5137 break; 5138 } 5139 } 5140 } 5141 } 5142 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5143 vbs = bs; 5144 for (i=0;i<n_R/vbs;i++) { 5145 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5146 } 5147 } 5148 } 5149 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 5150 if (sub_schurs && sub_schurs->reuse_solver) { 5151 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5152 5153 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5154 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 5155 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 5156 reuse_solver->is_R = pcbddc->is_R_local; 5157 } else { 5158 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 5159 } 5160 5161 /* print some info if requested */ 5162 if (pcbddc->dbg_flag) { 5163 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5164 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5165 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5166 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 5167 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 5168 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 5169 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5170 } 5171 5172 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5173 if (!sub_schurs || !sub_schurs->reuse_solver) { 5174 IS is_aux1,is_aux2; 5175 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5176 5177 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5178 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 5179 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 5180 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5181 for (i=0; i<n_D; i++) { 5182 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 5183 } 5184 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5185 for (i=0, j=0; i<n_R; i++) { 5186 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5187 aux_array1[j++] = i; 5188 } 5189 } 5190 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5191 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5192 for (i=0, j=0; i<n_B; i++) { 5193 if (!PetscBTLookup(bitmask,is_indices[i])) { 5194 aux_array2[j++] = i; 5195 } 5196 } 5197 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5198 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 5199 ierr = VecScatterCreateWithData(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 5200 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5201 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 5202 5203 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5204 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 5205 for (i=0, j=0; i<n_R; i++) { 5206 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5207 aux_array1[j++] = i; 5208 } 5209 } 5210 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5211 ierr = VecScatterCreateWithData(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5212 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5213 } 5214 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 5215 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5216 } else { 5217 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5218 IS tis; 5219 PetscInt schur_size; 5220 5221 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 5222 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 5223 ierr = VecScatterCreateWithData(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 5224 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5225 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5226 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 5227 ierr = VecScatterCreateWithData(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5228 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5229 } 5230 } 5231 PetscFunctionReturn(0); 5232 } 5233 5234 5235 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5236 { 5237 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5238 PC_IS *pcis = (PC_IS*)pc->data; 5239 PC pc_temp; 5240 Mat A_RR; 5241 MatReuse reuse; 5242 PetscScalar m_one = -1.0; 5243 PetscReal value; 5244 PetscInt n_D,n_R; 5245 PetscBool check_corr,issbaij; 5246 PetscErrorCode ierr; 5247 /* prefixes stuff */ 5248 char dir_prefix[256],neu_prefix[256],str_level[16]; 5249 size_t len; 5250 5251 PetscFunctionBegin; 5252 ierr = PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5253 /* compute prefixes */ 5254 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 5255 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 5256 if (!pcbddc->current_level) { 5257 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));CHKERRQ(ierr); 5258 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));CHKERRQ(ierr); 5259 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5260 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5261 } else { 5262 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 5263 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5264 len -= 15; /* remove "pc_bddc_coarse_" */ 5265 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5266 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5267 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5268 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5269 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5270 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5271 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5272 ierr = PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));CHKERRQ(ierr); 5273 ierr = PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));CHKERRQ(ierr); 5274 } 5275 5276 /* DIRICHLET PROBLEM */ 5277 if (dirichlet) { 5278 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5279 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5280 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 5281 if (pcbddc->dbg_flag) { 5282 Mat A_IIn; 5283 5284 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5285 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5286 pcis->A_II = A_IIn; 5287 } 5288 } 5289 if (pcbddc->local_mat->symmetric_set) { 5290 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5291 } 5292 /* Matrix for Dirichlet problem is pcis->A_II */ 5293 n_D = pcis->n - pcis->n_B; 5294 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5295 void (*f)(void) = 0; 5296 5297 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5298 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5299 /* default */ 5300 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5301 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5302 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5303 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5304 if (issbaij) { 5305 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5306 } else { 5307 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5308 } 5309 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);CHKERRQ(ierr); 5310 /* Allow user's customization */ 5311 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5312 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5313 if (f && pcbddc->mat_graph->cloc) { 5314 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5315 const PetscInt *idxs; 5316 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5317 5318 ierr = ISGetLocalSize(pcis->is_I_local,&nl);CHKERRQ(ierr); 5319 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5320 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5321 for (i=0;i<nl;i++) { 5322 for (d=0;d<cdim;d++) { 5323 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5324 } 5325 } 5326 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5327 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5328 ierr = PetscFree(scoords);CHKERRQ(ierr); 5329 } 5330 } 5331 ierr = MatSetOptionsPrefix(pcis->A_II,((PetscObject)pcbddc->ksp_D)->prefix);CHKERRQ(ierr); 5332 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 5333 if (sub_schurs && sub_schurs->reuse_solver) { 5334 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5335 5336 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5337 } 5338 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5339 if (!n_D) { 5340 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5341 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5342 } 5343 /* set ksp_D into pcis data */ 5344 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5345 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5346 pcis->ksp_D = pcbddc->ksp_D; 5347 } 5348 5349 /* NEUMANN PROBLEM */ 5350 A_RR = 0; 5351 if (neumann) { 5352 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5353 PetscInt ibs,mbs; 5354 PetscBool issbaij, reuse_neumann_solver; 5355 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5356 5357 reuse_neumann_solver = PETSC_FALSE; 5358 if (sub_schurs && sub_schurs->reuse_solver) { 5359 IS iP; 5360 5361 reuse_neumann_solver = PETSC_TRUE; 5362 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5363 if (iP) reuse_neumann_solver = PETSC_FALSE; 5364 } 5365 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5366 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5367 if (pcbddc->ksp_R) { /* already created ksp */ 5368 PetscInt nn_R; 5369 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5370 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5371 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5372 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5373 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5374 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5375 reuse = MAT_INITIAL_MATRIX; 5376 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5377 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5378 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5379 reuse = MAT_INITIAL_MATRIX; 5380 } else { /* safe to reuse the matrix */ 5381 reuse = MAT_REUSE_MATRIX; 5382 } 5383 } 5384 /* last check */ 5385 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5386 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5387 reuse = MAT_INITIAL_MATRIX; 5388 } 5389 } else { /* first time, so we need to create the matrix */ 5390 reuse = MAT_INITIAL_MATRIX; 5391 } 5392 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 5393 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5394 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5395 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5396 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5397 if (matis->A == pcbddc->local_mat) { 5398 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5399 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5400 } else { 5401 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5402 } 5403 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5404 if (matis->A == pcbddc->local_mat) { 5405 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5406 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5407 } else { 5408 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5409 } 5410 } 5411 /* extract A_RR */ 5412 if (reuse_neumann_solver) { 5413 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5414 5415 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5416 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5417 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5418 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5419 } else { 5420 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5421 } 5422 } else { 5423 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5424 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5425 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5426 } 5427 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5428 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5429 } 5430 if (pcbddc->local_mat->symmetric_set) { 5431 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5432 } 5433 if (!pcbddc->ksp_R) { /* create object if not present */ 5434 void (*f)(void) = 0; 5435 5436 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5437 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5438 /* default */ 5439 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5440 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5441 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5442 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5443 if (issbaij) { 5444 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5445 } else { 5446 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5447 } 5448 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);CHKERRQ(ierr); 5449 /* Allow user's customization */ 5450 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5451 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5452 if (f && pcbddc->mat_graph->cloc) { 5453 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5454 const PetscInt *idxs; 5455 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5456 5457 ierr = ISGetLocalSize(pcbddc->is_R_local,&nl);CHKERRQ(ierr); 5458 ierr = ISGetIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5459 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5460 for (i=0;i<nl;i++) { 5461 for (d=0;d<cdim;d++) { 5462 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5463 } 5464 } 5465 ierr = ISRestoreIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5466 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5467 ierr = PetscFree(scoords);CHKERRQ(ierr); 5468 } 5469 } 5470 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5471 if (!n_R) { 5472 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5473 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5474 } 5475 ierr = MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);CHKERRQ(ierr); 5476 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5477 /* Reuse solver if it is present */ 5478 if (reuse_neumann_solver) { 5479 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5480 5481 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5482 } 5483 } 5484 5485 if (pcbddc->dbg_flag) { 5486 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5487 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5488 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5489 } 5490 5491 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5492 check_corr = PETSC_FALSE; 5493 if (pcbddc->NullSpace_corr[0]) { 5494 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5495 } 5496 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5497 check_corr = PETSC_TRUE; 5498 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5499 } 5500 if (neumann && pcbddc->NullSpace_corr[2]) { 5501 check_corr = PETSC_TRUE; 5502 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5503 } 5504 /* check Dirichlet and Neumann solvers */ 5505 if (pcbddc->dbg_flag) { 5506 if (dirichlet) { /* Dirichlet */ 5507 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5508 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5509 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5510 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5511 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5512 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5513 if (check_corr) { 5514 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 5515 } 5516 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5517 } 5518 if (neumann) { /* Neumann */ 5519 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5520 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5521 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5522 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5523 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5524 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5525 if (check_corr) { 5526 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 5527 } 5528 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5529 } 5530 } 5531 /* free Neumann problem's matrix */ 5532 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5533 ierr = PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5534 PetscFunctionReturn(0); 5535 } 5536 5537 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5538 { 5539 PetscErrorCode ierr; 5540 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5541 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5542 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 5543 5544 PetscFunctionBegin; 5545 if (!reuse_solver) { 5546 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5547 } 5548 if (!pcbddc->switch_static) { 5549 if (applytranspose && pcbddc->local_auxmat1) { 5550 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5551 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5552 } 5553 if (!reuse_solver) { 5554 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5555 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5556 } else { 5557 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5558 5559 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5560 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5561 } 5562 } else { 5563 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5564 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5565 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5566 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5567 if (applytranspose && pcbddc->local_auxmat1) { 5568 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5569 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5570 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5571 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5572 } 5573 } 5574 if (!reuse_solver || pcbddc->switch_static) { 5575 if (applytranspose) { 5576 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5577 } else { 5578 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5579 } 5580 } else { 5581 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5582 5583 if (applytranspose) { 5584 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5585 } else { 5586 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5587 } 5588 } 5589 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5590 if (!pcbddc->switch_static) { 5591 if (!reuse_solver) { 5592 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5593 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5594 } else { 5595 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5596 5597 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5598 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5599 } 5600 if (!applytranspose && pcbddc->local_auxmat1) { 5601 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5602 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5603 } 5604 } else { 5605 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5606 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5607 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5608 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5609 if (!applytranspose && pcbddc->local_auxmat1) { 5610 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5611 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5612 } 5613 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5614 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5615 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5616 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5617 } 5618 PetscFunctionReturn(0); 5619 } 5620 5621 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5622 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5623 { 5624 PetscErrorCode ierr; 5625 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5626 PC_IS* pcis = (PC_IS*) (pc->data); 5627 const PetscScalar zero = 0.0; 5628 5629 PetscFunctionBegin; 5630 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5631 if (!pcbddc->benign_apply_coarse_only) { 5632 if (applytranspose) { 5633 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5634 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5635 } else { 5636 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5637 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5638 } 5639 } else { 5640 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5641 } 5642 5643 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5644 if (pcbddc->benign_n) { 5645 PetscScalar *array; 5646 PetscInt j; 5647 5648 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5649 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5650 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5651 } 5652 5653 /* start communications from local primal nodes to rhs of coarse solver */ 5654 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5655 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5656 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5657 5658 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5659 if (pcbddc->coarse_ksp) { 5660 Mat coarse_mat; 5661 Vec rhs,sol; 5662 MatNullSpace nullsp; 5663 PetscBool isbddc = PETSC_FALSE; 5664 5665 if (pcbddc->benign_have_null) { 5666 PC coarse_pc; 5667 5668 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5669 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5670 /* we need to propagate to coarser levels the need for a possible benign correction */ 5671 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5672 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5673 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5674 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5675 } 5676 } 5677 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5678 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5679 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5680 if (applytranspose) { 5681 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5682 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5683 ierr = MatGetTransposeNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5684 if (nullsp) { 5685 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5686 } 5687 } else { 5688 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5689 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5690 PC coarse_pc; 5691 5692 if (nullsp) { 5693 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5694 } 5695 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5696 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5697 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5698 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5699 } else { 5700 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5701 if (nullsp) { 5702 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5703 } 5704 } 5705 } 5706 /* we don't need the benign correction at coarser levels anymore */ 5707 if (pcbddc->benign_have_null && isbddc) { 5708 PC coarse_pc; 5709 PC_BDDC* coarsepcbddc; 5710 5711 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5712 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5713 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5714 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5715 } 5716 } 5717 5718 /* Local solution on R nodes */ 5719 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5720 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5721 } 5722 /* communications from coarse sol to local primal nodes */ 5723 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5724 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5725 5726 /* Sum contributions from the two levels */ 5727 if (!pcbddc->benign_apply_coarse_only) { 5728 if (applytranspose) { 5729 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5730 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5731 } else { 5732 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5733 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5734 } 5735 /* store p0 */ 5736 if (pcbddc->benign_n) { 5737 PetscScalar *array; 5738 PetscInt j; 5739 5740 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5741 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5742 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5743 } 5744 } else { /* expand the coarse solution */ 5745 if (applytranspose) { 5746 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5747 } else { 5748 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5749 } 5750 } 5751 PetscFunctionReturn(0); 5752 } 5753 5754 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5755 { 5756 PetscErrorCode ierr; 5757 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5758 PetscScalar *array; 5759 Vec from,to; 5760 5761 PetscFunctionBegin; 5762 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5763 from = pcbddc->coarse_vec; 5764 to = pcbddc->vec1_P; 5765 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5766 Vec tvec; 5767 5768 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5769 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5770 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5771 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5772 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5773 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5774 } 5775 } else { /* from local to global -> put data in coarse right hand side */ 5776 from = pcbddc->vec1_P; 5777 to = pcbddc->coarse_vec; 5778 } 5779 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5780 PetscFunctionReturn(0); 5781 } 5782 5783 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5784 { 5785 PetscErrorCode ierr; 5786 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5787 PetscScalar *array; 5788 Vec from,to; 5789 5790 PetscFunctionBegin; 5791 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5792 from = pcbddc->coarse_vec; 5793 to = pcbddc->vec1_P; 5794 } else { /* from local to global -> put data in coarse right hand side */ 5795 from = pcbddc->vec1_P; 5796 to = pcbddc->coarse_vec; 5797 } 5798 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5799 if (smode == SCATTER_FORWARD) { 5800 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5801 Vec tvec; 5802 5803 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5804 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5805 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5806 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5807 } 5808 } else { 5809 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5810 ierr = VecResetArray(from);CHKERRQ(ierr); 5811 } 5812 } 5813 PetscFunctionReturn(0); 5814 } 5815 5816 /* uncomment for testing purposes */ 5817 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5818 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5819 { 5820 PetscErrorCode ierr; 5821 PC_IS* pcis = (PC_IS*)(pc->data); 5822 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5823 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5824 /* one and zero */ 5825 PetscScalar one=1.0,zero=0.0; 5826 /* space to store constraints and their local indices */ 5827 PetscScalar *constraints_data; 5828 PetscInt *constraints_idxs,*constraints_idxs_B; 5829 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5830 PetscInt *constraints_n; 5831 /* iterators */ 5832 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5833 /* BLAS integers */ 5834 PetscBLASInt lwork,lierr; 5835 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5836 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5837 /* reuse */ 5838 PetscInt olocal_primal_size,olocal_primal_size_cc; 5839 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5840 /* change of basis */ 5841 PetscBool qr_needed; 5842 PetscBT change_basis,qr_needed_idx; 5843 /* auxiliary stuff */ 5844 PetscInt *nnz,*is_indices; 5845 PetscInt ncc; 5846 /* some quantities */ 5847 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5848 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5849 PetscReal tol; /* tolerance for retaining eigenmodes */ 5850 5851 PetscFunctionBegin; 5852 tol = PetscSqrtReal(PETSC_SMALL); 5853 /* Destroy Mat objects computed previously */ 5854 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5855 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5856 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5857 /* save info on constraints from previous setup (if any) */ 5858 olocal_primal_size = pcbddc->local_primal_size; 5859 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5860 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5861 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5862 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5863 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5864 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5865 5866 if (!pcbddc->adaptive_selection) { 5867 IS ISForVertices,*ISForFaces,*ISForEdges; 5868 MatNullSpace nearnullsp; 5869 const Vec *nearnullvecs; 5870 Vec *localnearnullsp; 5871 PetscScalar *array; 5872 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5873 PetscBool nnsp_has_cnst; 5874 /* LAPACK working arrays for SVD or POD */ 5875 PetscBool skip_lapack,boolforchange; 5876 PetscScalar *work; 5877 PetscReal *singular_vals; 5878 #if defined(PETSC_USE_COMPLEX) 5879 PetscReal *rwork; 5880 #endif 5881 #if defined(PETSC_MISSING_LAPACK_GESVD) 5882 PetscScalar *temp_basis,*correlation_mat; 5883 #else 5884 PetscBLASInt dummy_int=1; 5885 PetscScalar dummy_scalar=1.; 5886 #endif 5887 5888 /* Get index sets for faces, edges and vertices from graph */ 5889 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5890 /* print some info */ 5891 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5892 PetscInt nv; 5893 5894 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5895 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5896 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5897 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5898 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5899 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5900 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5901 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5902 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5903 } 5904 5905 /* free unneeded index sets */ 5906 if (!pcbddc->use_vertices) { 5907 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5908 } 5909 if (!pcbddc->use_edges) { 5910 for (i=0;i<n_ISForEdges;i++) { 5911 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5912 } 5913 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5914 n_ISForEdges = 0; 5915 } 5916 if (!pcbddc->use_faces) { 5917 for (i=0;i<n_ISForFaces;i++) { 5918 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5919 } 5920 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5921 n_ISForFaces = 0; 5922 } 5923 5924 /* check if near null space is attached to global mat */ 5925 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5926 if (nearnullsp) { 5927 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5928 /* remove any stored info */ 5929 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5930 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5931 /* store information for BDDC solver reuse */ 5932 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5933 pcbddc->onearnullspace = nearnullsp; 5934 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5935 for (i=0;i<nnsp_size;i++) { 5936 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5937 } 5938 } else { /* if near null space is not provided BDDC uses constants by default */ 5939 nnsp_size = 0; 5940 nnsp_has_cnst = PETSC_TRUE; 5941 } 5942 /* get max number of constraints on a single cc */ 5943 max_constraints = nnsp_size; 5944 if (nnsp_has_cnst) max_constraints++; 5945 5946 /* 5947 Evaluate maximum storage size needed by the procedure 5948 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5949 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5950 There can be multiple constraints per connected component 5951 */ 5952 n_vertices = 0; 5953 if (ISForVertices) { 5954 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5955 } 5956 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5957 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5958 5959 total_counts = n_ISForFaces+n_ISForEdges; 5960 total_counts *= max_constraints; 5961 total_counts += n_vertices; 5962 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5963 5964 total_counts = 0; 5965 max_size_of_constraint = 0; 5966 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5967 IS used_is; 5968 if (i<n_ISForEdges) { 5969 used_is = ISForEdges[i]; 5970 } else { 5971 used_is = ISForFaces[i-n_ISForEdges]; 5972 } 5973 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5974 total_counts += j; 5975 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5976 } 5977 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5978 5979 /* get local part of global near null space vectors */ 5980 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5981 for (k=0;k<nnsp_size;k++) { 5982 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5983 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5984 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5985 } 5986 5987 /* whether or not to skip lapack calls */ 5988 skip_lapack = PETSC_TRUE; 5989 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5990 5991 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5992 if (!skip_lapack) { 5993 PetscScalar temp_work; 5994 5995 #if defined(PETSC_MISSING_LAPACK_GESVD) 5996 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5997 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5998 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5999 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 6000 #if defined(PETSC_USE_COMPLEX) 6001 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 6002 #endif 6003 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6004 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6005 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 6006 lwork = -1; 6007 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6008 #if !defined(PETSC_USE_COMPLEX) 6009 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 6010 #else 6011 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 6012 #endif 6013 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6014 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 6015 #else /* on missing GESVD */ 6016 /* SVD */ 6017 PetscInt max_n,min_n; 6018 max_n = max_size_of_constraint; 6019 min_n = max_constraints; 6020 if (max_size_of_constraint < max_constraints) { 6021 min_n = max_size_of_constraint; 6022 max_n = max_constraints; 6023 } 6024 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 6025 #if defined(PETSC_USE_COMPLEX) 6026 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 6027 #endif 6028 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6029 lwork = -1; 6030 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 6031 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 6032 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 6033 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6034 #if !defined(PETSC_USE_COMPLEX) 6035 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 6036 #else 6037 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 6038 #endif 6039 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6040 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 6041 #endif /* on missing GESVD */ 6042 /* Allocate optimal workspace */ 6043 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 6044 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 6045 } 6046 /* Now we can loop on constraining sets */ 6047 total_counts = 0; 6048 constraints_idxs_ptr[0] = 0; 6049 constraints_data_ptr[0] = 0; 6050 /* vertices */ 6051 if (n_vertices) { 6052 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6053 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 6054 for (i=0;i<n_vertices;i++) { 6055 constraints_n[total_counts] = 1; 6056 constraints_data[total_counts] = 1.0; 6057 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 6058 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 6059 total_counts++; 6060 } 6061 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6062 n_vertices = total_counts; 6063 } 6064 6065 /* edges and faces */ 6066 total_counts_cc = total_counts; 6067 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 6068 IS used_is; 6069 PetscBool idxs_copied = PETSC_FALSE; 6070 6071 if (ncc<n_ISForEdges) { 6072 used_is = ISForEdges[ncc]; 6073 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6074 } else { 6075 used_is = ISForFaces[ncc-n_ISForEdges]; 6076 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6077 } 6078 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6079 6080 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 6081 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6082 /* change of basis should not be performed on local periodic nodes */ 6083 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6084 if (nnsp_has_cnst) { 6085 PetscScalar quad_value; 6086 6087 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 6088 idxs_copied = PETSC_TRUE; 6089 6090 if (!pcbddc->use_nnsp_true) { 6091 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 6092 } else { 6093 quad_value = 1.0; 6094 } 6095 for (j=0;j<size_of_constraint;j++) { 6096 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 6097 } 6098 temp_constraints++; 6099 total_counts++; 6100 } 6101 for (k=0;k<nnsp_size;k++) { 6102 PetscReal real_value; 6103 PetscScalar *ptr_to_data; 6104 6105 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6106 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6107 for (j=0;j<size_of_constraint;j++) { 6108 ptr_to_data[j] = array[is_indices[j]]; 6109 } 6110 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6111 /* check if array is null on the connected component */ 6112 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6113 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6114 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6115 temp_constraints++; 6116 total_counts++; 6117 if (!idxs_copied) { 6118 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 6119 idxs_copied = PETSC_TRUE; 6120 } 6121 } 6122 } 6123 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6124 valid_constraints = temp_constraints; 6125 if (!pcbddc->use_nnsp_true && temp_constraints) { 6126 if (temp_constraints == 1) { /* just normalize the constraint */ 6127 PetscScalar norm,*ptr_to_data; 6128 6129 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6130 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6131 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6132 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6133 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6134 } else { /* perform SVD */ 6135 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6136 6137 #if defined(PETSC_MISSING_LAPACK_GESVD) 6138 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6139 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6140 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6141 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6142 from that computed using LAPACKgesvd 6143 -> This is due to a different computation of eigenvectors in LAPACKheev 6144 -> The quality of the POD-computed basis will be the same */ 6145 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 6146 /* Store upper triangular part of correlation matrix */ 6147 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6148 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6149 for (j=0;j<temp_constraints;j++) { 6150 for (k=0;k<j+1;k++) { 6151 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6152 } 6153 } 6154 /* compute eigenvalues and eigenvectors of correlation matrix */ 6155 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6156 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 6157 #if !defined(PETSC_USE_COMPLEX) 6158 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6159 #else 6160 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6161 #endif 6162 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6163 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6164 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6165 j = 0; 6166 while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++; 6167 total_counts = total_counts-j; 6168 valid_constraints = temp_constraints-j; 6169 /* scale and copy POD basis into used quadrature memory */ 6170 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6171 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6172 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 6173 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6174 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 6175 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6176 if (j<temp_constraints) { 6177 PetscInt ii; 6178 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6179 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6180 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6181 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6182 for (k=0;k<temp_constraints-j;k++) { 6183 for (ii=0;ii<size_of_constraint;ii++) { 6184 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6185 } 6186 } 6187 } 6188 #else /* on missing GESVD */ 6189 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6190 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6191 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6192 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6193 #if !defined(PETSC_USE_COMPLEX) 6194 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6195 #else 6196 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6197 #endif 6198 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6199 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6200 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6201 k = temp_constraints; 6202 if (k > size_of_constraint) k = size_of_constraint; 6203 j = 0; 6204 while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++; 6205 valid_constraints = k-j; 6206 total_counts = total_counts-temp_constraints+valid_constraints; 6207 #endif /* on missing GESVD */ 6208 } 6209 } 6210 /* update pointers information */ 6211 if (valid_constraints) { 6212 constraints_n[total_counts_cc] = valid_constraints; 6213 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6214 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6215 /* set change_of_basis flag */ 6216 if (boolforchange) { 6217 PetscBTSet(change_basis,total_counts_cc); 6218 } 6219 total_counts_cc++; 6220 } 6221 } 6222 /* free workspace */ 6223 if (!skip_lapack) { 6224 ierr = PetscFree(work);CHKERRQ(ierr); 6225 #if defined(PETSC_USE_COMPLEX) 6226 ierr = PetscFree(rwork);CHKERRQ(ierr); 6227 #endif 6228 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 6229 #if defined(PETSC_MISSING_LAPACK_GESVD) 6230 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 6231 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 6232 #endif 6233 } 6234 for (k=0;k<nnsp_size;k++) { 6235 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 6236 } 6237 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 6238 /* free index sets of faces, edges and vertices */ 6239 for (i=0;i<n_ISForFaces;i++) { 6240 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6241 } 6242 if (n_ISForFaces) { 6243 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6244 } 6245 for (i=0;i<n_ISForEdges;i++) { 6246 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6247 } 6248 if (n_ISForEdges) { 6249 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6250 } 6251 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6252 } else { 6253 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6254 6255 total_counts = 0; 6256 n_vertices = 0; 6257 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6258 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6259 } 6260 max_constraints = 0; 6261 total_counts_cc = 0; 6262 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6263 total_counts += pcbddc->adaptive_constraints_n[i]; 6264 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6265 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6266 } 6267 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6268 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6269 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6270 constraints_data = pcbddc->adaptive_constraints_data; 6271 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6272 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 6273 total_counts_cc = 0; 6274 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6275 if (pcbddc->adaptive_constraints_n[i]) { 6276 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6277 } 6278 } 6279 6280 max_size_of_constraint = 0; 6281 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6282 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 6283 /* Change of basis */ 6284 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 6285 if (pcbddc->use_change_of_basis) { 6286 for (i=0;i<sub_schurs->n_subs;i++) { 6287 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6288 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 6289 } 6290 } 6291 } 6292 } 6293 pcbddc->local_primal_size = total_counts; 6294 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6295 6296 /* map constraints_idxs in boundary numbering */ 6297 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 6298 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i); 6299 6300 /* Create constraint matrix */ 6301 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6302 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 6303 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 6304 6305 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6306 /* determine if a QR strategy is needed for change of basis */ 6307 qr_needed = pcbddc->use_qr_single; 6308 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6309 total_primal_vertices=0; 6310 pcbddc->local_primal_size_cc = 0; 6311 for (i=0;i<total_counts_cc;i++) { 6312 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6313 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6314 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6315 pcbddc->local_primal_size_cc += 1; 6316 } else if (PetscBTLookup(change_basis,i)) { 6317 for (k=0;k<constraints_n[i];k++) { 6318 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6319 } 6320 pcbddc->local_primal_size_cc += constraints_n[i]; 6321 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6322 PetscBTSet(qr_needed_idx,i); 6323 qr_needed = PETSC_TRUE; 6324 } 6325 } else { 6326 pcbddc->local_primal_size_cc += 1; 6327 } 6328 } 6329 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6330 pcbddc->n_vertices = total_primal_vertices; 6331 /* permute indices in order to have a sorted set of vertices */ 6332 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6333 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6334 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 6335 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6336 6337 /* nonzero structure of constraint matrix */ 6338 /* and get reference dof for local constraints */ 6339 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6340 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6341 6342 j = total_primal_vertices; 6343 total_counts = total_primal_vertices; 6344 cum = total_primal_vertices; 6345 for (i=n_vertices;i<total_counts_cc;i++) { 6346 if (!PetscBTLookup(change_basis,i)) { 6347 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6348 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6349 cum++; 6350 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6351 for (k=0;k<constraints_n[i];k++) { 6352 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6353 nnz[j+k] = size_of_constraint; 6354 } 6355 j += constraints_n[i]; 6356 } 6357 } 6358 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6359 ierr = MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6360 ierr = PetscFree(nnz);CHKERRQ(ierr); 6361 6362 /* set values in constraint matrix */ 6363 for (i=0;i<total_primal_vertices;i++) { 6364 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6365 } 6366 total_counts = total_primal_vertices; 6367 for (i=n_vertices;i<total_counts_cc;i++) { 6368 if (!PetscBTLookup(change_basis,i)) { 6369 PetscInt *cols; 6370 6371 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6372 cols = constraints_idxs+constraints_idxs_ptr[i]; 6373 for (k=0;k<constraints_n[i];k++) { 6374 PetscInt row = total_counts+k; 6375 PetscScalar *vals; 6376 6377 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6378 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6379 } 6380 total_counts += constraints_n[i]; 6381 } 6382 } 6383 /* assembling */ 6384 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6385 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6386 ierr = MatViewFromOptions(pcbddc->ConstraintMatrix,NULL,"-pc_bddc_constraint_mat_view");CHKERRQ(ierr); 6387 6388 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6389 if (pcbddc->use_change_of_basis) { 6390 /* dual and primal dofs on a single cc */ 6391 PetscInt dual_dofs,primal_dofs; 6392 /* working stuff for GEQRF */ 6393 PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t; 6394 PetscBLASInt lqr_work; 6395 /* working stuff for UNGQR */ 6396 PetscScalar *gqr_work = NULL,lgqr_work_t; 6397 PetscBLASInt lgqr_work; 6398 /* working stuff for TRTRS */ 6399 PetscScalar *trs_rhs = NULL; 6400 PetscBLASInt Blas_NRHS; 6401 /* pointers for values insertion into change of basis matrix */ 6402 PetscInt *start_rows,*start_cols; 6403 PetscScalar *start_vals; 6404 /* working stuff for values insertion */ 6405 PetscBT is_primal; 6406 PetscInt *aux_primal_numbering_B; 6407 /* matrix sizes */ 6408 PetscInt global_size,local_size; 6409 /* temporary change of basis */ 6410 Mat localChangeOfBasisMatrix; 6411 /* extra space for debugging */ 6412 PetscScalar *dbg_work = NULL; 6413 6414 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6415 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6416 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6417 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6418 /* nonzeros for local mat */ 6419 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6420 if (!pcbddc->benign_change || pcbddc->fake_change) { 6421 for (i=0;i<pcis->n;i++) nnz[i]=1; 6422 } else { 6423 const PetscInt *ii; 6424 PetscInt n; 6425 PetscBool flg_row; 6426 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6427 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6428 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6429 } 6430 for (i=n_vertices;i<total_counts_cc;i++) { 6431 if (PetscBTLookup(change_basis,i)) { 6432 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6433 if (PetscBTLookup(qr_needed_idx,i)) { 6434 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6435 } else { 6436 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6437 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6438 } 6439 } 6440 } 6441 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6442 ierr = MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6443 ierr = PetscFree(nnz);CHKERRQ(ierr); 6444 /* Set interior change in the matrix */ 6445 if (!pcbddc->benign_change || pcbddc->fake_change) { 6446 for (i=0;i<pcis->n;i++) { 6447 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6448 } 6449 } else { 6450 const PetscInt *ii,*jj; 6451 PetscScalar *aa; 6452 PetscInt n; 6453 PetscBool flg_row; 6454 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6455 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6456 for (i=0;i<n;i++) { 6457 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6458 } 6459 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6460 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6461 } 6462 6463 if (pcbddc->dbg_flag) { 6464 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6465 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6466 } 6467 6468 6469 /* Now we loop on the constraints which need a change of basis */ 6470 /* 6471 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6472 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6473 6474 Basic blocks of change of basis matrix T computed by 6475 6476 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6477 6478 | 1 0 ... 0 s_1/S | 6479 | 0 1 ... 0 s_2/S | 6480 | ... | 6481 | 0 ... 1 s_{n-1}/S | 6482 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6483 6484 with S = \sum_{i=1}^n s_i^2 6485 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6486 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6487 6488 - QR decomposition of constraints otherwise 6489 */ 6490 if (qr_needed && max_size_of_constraint) { 6491 /* space to store Q */ 6492 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6493 /* array to store scaling factors for reflectors */ 6494 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6495 /* first we issue queries for optimal work */ 6496 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6497 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6498 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6499 lqr_work = -1; 6500 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6501 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6502 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6503 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6504 lgqr_work = -1; 6505 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6506 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6507 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6508 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6509 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6510 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6511 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6512 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6513 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6514 /* array to store rhs and solution of triangular solver */ 6515 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6516 /* allocating workspace for check */ 6517 if (pcbddc->dbg_flag) { 6518 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6519 } 6520 } 6521 /* array to store whether a node is primal or not */ 6522 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6523 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6524 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6525 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i); 6526 for (i=0;i<total_primal_vertices;i++) { 6527 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6528 } 6529 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6530 6531 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6532 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6533 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6534 if (PetscBTLookup(change_basis,total_counts)) { 6535 /* get constraint info */ 6536 primal_dofs = constraints_n[total_counts]; 6537 dual_dofs = size_of_constraint-primal_dofs; 6538 6539 if (pcbddc->dbg_flag) { 6540 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6541 } 6542 6543 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6544 6545 /* copy quadrature constraints for change of basis check */ 6546 if (pcbddc->dbg_flag) { 6547 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6548 } 6549 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6550 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6551 6552 /* compute QR decomposition of constraints */ 6553 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6554 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6555 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6556 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6557 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6558 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6559 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6560 6561 /* explictly compute R^-T */ 6562 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 6563 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6564 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6565 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6566 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6567 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6568 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6569 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6570 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6571 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6572 6573 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6574 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6575 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6576 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6577 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6578 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6579 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6580 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6581 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6582 6583 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6584 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6585 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6586 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6587 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6588 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6589 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6590 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6591 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6592 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6593 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6594 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6595 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6596 6597 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6598 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6599 /* insert cols for primal dofs */ 6600 for (j=0;j<primal_dofs;j++) { 6601 start_vals = &qr_basis[j*size_of_constraint]; 6602 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6603 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6604 } 6605 /* insert cols for dual dofs */ 6606 for (j=0,k=0;j<dual_dofs;k++) { 6607 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6608 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6609 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6610 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6611 j++; 6612 } 6613 } 6614 6615 /* check change of basis */ 6616 if (pcbddc->dbg_flag) { 6617 PetscInt ii,jj; 6618 PetscBool valid_qr=PETSC_TRUE; 6619 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6620 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6621 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6622 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6623 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6624 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6625 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6626 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6627 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6628 for (jj=0;jj<size_of_constraint;jj++) { 6629 for (ii=0;ii<primal_dofs;ii++) { 6630 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6631 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6632 } 6633 } 6634 if (!valid_qr) { 6635 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6636 for (jj=0;jj<size_of_constraint;jj++) { 6637 for (ii=0;ii<primal_dofs;ii++) { 6638 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6639 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6640 } 6641 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6642 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6643 } 6644 } 6645 } 6646 } else { 6647 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6648 } 6649 } 6650 } else { /* simple transformation block */ 6651 PetscInt row,col; 6652 PetscScalar val,norm; 6653 6654 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6655 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6656 for (j=0;j<size_of_constraint;j++) { 6657 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6658 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6659 if (!PetscBTLookup(is_primal,row_B)) { 6660 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6661 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6662 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6663 } else { 6664 for (k=0;k<size_of_constraint;k++) { 6665 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6666 if (row != col) { 6667 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6668 } else { 6669 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6670 } 6671 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6672 } 6673 } 6674 } 6675 if (pcbddc->dbg_flag) { 6676 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6677 } 6678 } 6679 } else { 6680 if (pcbddc->dbg_flag) { 6681 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6682 } 6683 } 6684 } 6685 6686 /* free workspace */ 6687 if (qr_needed) { 6688 if (pcbddc->dbg_flag) { 6689 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6690 } 6691 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6692 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6693 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6694 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6695 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6696 } 6697 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6698 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6699 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6700 6701 /* assembling of global change of variable */ 6702 if (!pcbddc->fake_change) { 6703 Mat tmat; 6704 PetscInt bs; 6705 6706 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6707 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6708 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6709 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6710 ierr = MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6711 ierr = MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6712 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6713 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6714 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6715 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6716 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6717 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6718 ierr = MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6719 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6720 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6721 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6722 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6723 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6724 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6725 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6726 6727 /* check */ 6728 if (pcbddc->dbg_flag) { 6729 PetscReal error; 6730 Vec x,x_change; 6731 6732 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6733 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6734 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6735 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6736 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6737 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6738 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6739 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6740 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6741 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6742 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6743 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6744 if (error > PETSC_SMALL) { 6745 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 6746 } 6747 ierr = VecDestroy(&x);CHKERRQ(ierr); 6748 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6749 } 6750 /* adapt sub_schurs computed (if any) */ 6751 if (pcbddc->use_deluxe_scaling) { 6752 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6753 6754 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6755 if (sub_schurs && sub_schurs->S_Ej_all) { 6756 Mat S_new,tmat; 6757 IS is_all_N,is_V_Sall = NULL; 6758 6759 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6760 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6761 if (pcbddc->deluxe_zerorows) { 6762 ISLocalToGlobalMapping NtoSall; 6763 IS is_V; 6764 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6765 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6766 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6767 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6768 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6769 } 6770 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6771 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6772 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6773 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6774 if (pcbddc->deluxe_zerorows) { 6775 const PetscScalar *array; 6776 const PetscInt *idxs_V,*idxs_all; 6777 PetscInt i,n_V; 6778 6779 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6780 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6781 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6782 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6783 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6784 for (i=0;i<n_V;i++) { 6785 PetscScalar val; 6786 PetscInt idx; 6787 6788 idx = idxs_V[i]; 6789 val = array[idxs_all[idxs_V[i]]]; 6790 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6791 } 6792 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6793 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6794 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6795 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6796 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6797 } 6798 sub_schurs->S_Ej_all = S_new; 6799 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6800 if (sub_schurs->sum_S_Ej_all) { 6801 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6802 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6803 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6804 if (pcbddc->deluxe_zerorows) { 6805 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6806 } 6807 sub_schurs->sum_S_Ej_all = S_new; 6808 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6809 } 6810 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6811 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6812 } 6813 /* destroy any change of basis context in sub_schurs */ 6814 if (sub_schurs && sub_schurs->change) { 6815 PetscInt i; 6816 6817 for (i=0;i<sub_schurs->n_subs;i++) { 6818 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6819 } 6820 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6821 } 6822 } 6823 if (pcbddc->switch_static) { /* need to save the local change */ 6824 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6825 } else { 6826 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6827 } 6828 /* determine if any process has changed the pressures locally */ 6829 pcbddc->change_interior = pcbddc->benign_have_null; 6830 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6831 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6832 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6833 pcbddc->use_qr_single = qr_needed; 6834 } 6835 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6836 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6837 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6838 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6839 } else { 6840 Mat benign_global = NULL; 6841 if (pcbddc->benign_have_null) { 6842 Mat M; 6843 6844 pcbddc->change_interior = PETSC_TRUE; 6845 ierr = VecCopy(matis->counter,pcis->vec1_N);CHKERRQ(ierr); 6846 ierr = VecReciprocal(pcis->vec1_N);CHKERRQ(ierr); 6847 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);CHKERRQ(ierr); 6848 if (pcbddc->benign_change) { 6849 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6850 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6851 } else { 6852 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);CHKERRQ(ierr); 6853 ierr = MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);CHKERRQ(ierr); 6854 } 6855 ierr = MatISSetLocalMat(benign_global,M);CHKERRQ(ierr); 6856 ierr = MatDestroy(&M);CHKERRQ(ierr); 6857 ierr = MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6858 ierr = MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6859 } 6860 if (pcbddc->user_ChangeOfBasisMatrix) { 6861 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6862 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6863 } else if (pcbddc->benign_have_null) { 6864 pcbddc->ChangeOfBasisMatrix = benign_global; 6865 } 6866 } 6867 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6868 IS is_global; 6869 const PetscInt *gidxs; 6870 6871 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6872 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6873 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6874 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6875 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6876 } 6877 } 6878 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6879 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6880 } 6881 6882 if (!pcbddc->fake_change) { 6883 /* add pressure dofs to set of primal nodes for numbering purposes */ 6884 for (i=0;i<pcbddc->benign_n;i++) { 6885 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6886 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6887 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6888 pcbddc->local_primal_size_cc++; 6889 pcbddc->local_primal_size++; 6890 } 6891 6892 /* check if a new primal space has been introduced (also take into account benign trick) */ 6893 pcbddc->new_primal_space_local = PETSC_TRUE; 6894 if (olocal_primal_size == pcbddc->local_primal_size) { 6895 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6896 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6897 if (!pcbddc->new_primal_space_local) { 6898 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6899 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6900 } 6901 } 6902 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6903 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6904 } 6905 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6906 6907 /* flush dbg viewer */ 6908 if (pcbddc->dbg_flag) { 6909 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6910 } 6911 6912 /* free workspace */ 6913 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6914 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6915 if (!pcbddc->adaptive_selection) { 6916 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6917 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6918 } else { 6919 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6920 pcbddc->adaptive_constraints_idxs_ptr, 6921 pcbddc->adaptive_constraints_data_ptr, 6922 pcbddc->adaptive_constraints_idxs, 6923 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6924 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6925 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6926 } 6927 PetscFunctionReturn(0); 6928 } 6929 /* #undef PETSC_MISSING_LAPACK_GESVD */ 6930 6931 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6932 { 6933 ISLocalToGlobalMapping map; 6934 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6935 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6936 PetscInt i,N; 6937 PetscBool rcsr = PETSC_FALSE; 6938 PetscErrorCode ierr; 6939 6940 PetscFunctionBegin; 6941 if (pcbddc->recompute_topography) { 6942 pcbddc->graphanalyzed = PETSC_FALSE; 6943 /* Reset previously computed graph */ 6944 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6945 /* Init local Graph struct */ 6946 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6947 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6948 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6949 6950 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6951 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6952 } 6953 /* Check validity of the csr graph passed in by the user */ 6954 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6955 6956 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6957 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6958 PetscInt *xadj,*adjncy; 6959 PetscInt nvtxs; 6960 PetscBool flg_row=PETSC_FALSE; 6961 6962 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6963 if (flg_row) { 6964 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6965 pcbddc->computed_rowadj = PETSC_TRUE; 6966 } 6967 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6968 rcsr = PETSC_TRUE; 6969 } 6970 if (pcbddc->dbg_flag) { 6971 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6972 } 6973 6974 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 6975 PetscReal *lcoords; 6976 PetscInt n; 6977 MPI_Datatype dimrealtype; 6978 6979 if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 6980 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 6981 ierr = PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);CHKERRQ(ierr); 6982 ierr = MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);CHKERRQ(ierr); 6983 ierr = MPI_Type_commit(&dimrealtype);CHKERRQ(ierr); 6984 ierr = PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 6985 ierr = PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 6986 ierr = MPI_Type_free(&dimrealtype);CHKERRQ(ierr); 6987 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 6988 6989 pcbddc->mat_graph->coords = lcoords; 6990 pcbddc->mat_graph->cloc = PETSC_TRUE; 6991 pcbddc->mat_graph->cnloc = n; 6992 } 6993 if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 6994 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected); 6995 6996 /* Setup of Graph */ 6997 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6998 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6999 7000 /* attach info on disconnected subdomains if present */ 7001 if (pcbddc->n_local_subs) { 7002 PetscInt *local_subs; 7003 7004 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 7005 for (i=0;i<pcbddc->n_local_subs;i++) { 7006 const PetscInt *idxs; 7007 PetscInt nl,j; 7008 7009 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 7010 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 7011 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 7012 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 7013 } 7014 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 7015 pcbddc->mat_graph->local_subs = local_subs; 7016 } 7017 } 7018 7019 if (!pcbddc->graphanalyzed) { 7020 /* Graph's connected components analysis */ 7021 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 7022 pcbddc->graphanalyzed = PETSC_TRUE; 7023 } 7024 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7025 PetscFunctionReturn(0); 7026 } 7027 7028 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 7029 { 7030 PetscInt i,j; 7031 PetscScalar *alphas; 7032 PetscErrorCode ierr; 7033 7034 PetscFunctionBegin; 7035 if (!n) PetscFunctionReturn(0); 7036 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 7037 ierr = VecNormalize(vecs[0],NULL);CHKERRQ(ierr); 7038 for (i=1;i<n;i++) { 7039 ierr = VecMDot(vecs[i],i,vecs,alphas);CHKERRQ(ierr); 7040 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 7041 ierr = VecMAXPY(vecs[i],i,alphas,vecs);CHKERRQ(ierr); 7042 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 7043 } 7044 ierr = PetscFree(alphas);CHKERRQ(ierr); 7045 PetscFunctionReturn(0); 7046 } 7047 7048 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 7049 { 7050 Mat A; 7051 PetscInt n_neighs,*neighs,*n_shared,**shared; 7052 PetscMPIInt size,rank,color; 7053 PetscInt *xadj,*adjncy; 7054 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 7055 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 7056 PetscInt void_procs,*procs_candidates = NULL; 7057 PetscInt xadj_count,*count; 7058 PetscBool ismatis,use_vwgt=PETSC_FALSE; 7059 PetscSubcomm psubcomm; 7060 MPI_Comm subcomm; 7061 PetscErrorCode ierr; 7062 7063 PetscFunctionBegin; 7064 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7065 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7066 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7067 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 7068 PetscValidLogicalCollectiveInt(mat,redprocs,3); 7069 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains); 7070 7071 if (have_void) *have_void = PETSC_FALSE; 7072 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 7073 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 7074 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 7075 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 7076 im_active = !!n; 7077 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7078 void_procs = size - active_procs; 7079 /* get ranks of of non-active processes in mat communicator */ 7080 if (void_procs) { 7081 PetscInt ncand; 7082 7083 if (have_void) *have_void = PETSC_TRUE; 7084 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 7085 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7086 for (i=0,ncand=0;i<size;i++) { 7087 if (!procs_candidates[i]) { 7088 procs_candidates[ncand++] = i; 7089 } 7090 } 7091 /* force n_subdomains to be not greater that the number of non-active processes */ 7092 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7093 } 7094 7095 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7096 number of subdomains requested 1 -> send to master or first candidate in voids */ 7097 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 7098 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7099 PetscInt issize,isidx,dest; 7100 if (*n_subdomains == 1) dest = 0; 7101 else dest = rank; 7102 if (im_active) { 7103 issize = 1; 7104 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7105 isidx = procs_candidates[dest]; 7106 } else { 7107 isidx = dest; 7108 } 7109 } else { 7110 issize = 0; 7111 isidx = -1; 7112 } 7113 if (*n_subdomains != 1) *n_subdomains = active_procs; 7114 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 7115 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7116 PetscFunctionReturn(0); 7117 } 7118 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 7119 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 7120 threshold = PetscMax(threshold,2); 7121 7122 /* Get info on mapping */ 7123 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7124 7125 /* build local CSR graph of subdomains' connectivity */ 7126 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 7127 xadj[0] = 0; 7128 xadj[1] = PetscMax(n_neighs-1,0); 7129 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 7130 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 7131 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 7132 for (i=1;i<n_neighs;i++) 7133 for (j=0;j<n_shared[i];j++) 7134 count[shared[i][j]] += 1; 7135 7136 xadj_count = 0; 7137 for (i=1;i<n_neighs;i++) { 7138 for (j=0;j<n_shared[i];j++) { 7139 if (count[shared[i][j]] < threshold) { 7140 adjncy[xadj_count] = neighs[i]; 7141 adjncy_wgt[xadj_count] = n_shared[i]; 7142 xadj_count++; 7143 break; 7144 } 7145 } 7146 } 7147 xadj[1] = xadj_count; 7148 ierr = PetscFree(count);CHKERRQ(ierr); 7149 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7150 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7151 7152 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 7153 7154 /* Restrict work on active processes only */ 7155 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 7156 if (void_procs) { 7157 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 7158 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 7159 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 7160 subcomm = PetscSubcommChild(psubcomm); 7161 } else { 7162 psubcomm = NULL; 7163 subcomm = PetscObjectComm((PetscObject)mat); 7164 } 7165 7166 v_wgt = NULL; 7167 if (!color) { 7168 ierr = PetscFree(xadj);CHKERRQ(ierr); 7169 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7170 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7171 } else { 7172 Mat subdomain_adj; 7173 IS new_ranks,new_ranks_contig; 7174 MatPartitioning partitioner; 7175 PetscInt rstart=0,rend=0; 7176 PetscInt *is_indices,*oldranks; 7177 PetscMPIInt size; 7178 PetscBool aggregate; 7179 7180 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 7181 if (void_procs) { 7182 PetscInt prank = rank; 7183 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 7184 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 7185 for (i=0;i<xadj[1];i++) { 7186 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 7187 } 7188 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7189 } else { 7190 oldranks = NULL; 7191 } 7192 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7193 if (aggregate) { /* TODO: all this part could be made more efficient */ 7194 PetscInt lrows,row,ncols,*cols; 7195 PetscMPIInt nrank; 7196 PetscScalar *vals; 7197 7198 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 7199 lrows = 0; 7200 if (nrank<redprocs) { 7201 lrows = size/redprocs; 7202 if (nrank<size%redprocs) lrows++; 7203 } 7204 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 7205 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 7206 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7207 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7208 row = nrank; 7209 ncols = xadj[1]-xadj[0]; 7210 cols = adjncy; 7211 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 7212 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7213 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 7214 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7215 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7216 ierr = PetscFree(xadj);CHKERRQ(ierr); 7217 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7218 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7219 ierr = PetscFree(vals);CHKERRQ(ierr); 7220 if (use_vwgt) { 7221 Vec v; 7222 const PetscScalar *array; 7223 PetscInt nl; 7224 7225 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 7226 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 7227 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 7228 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 7229 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 7230 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 7231 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 7232 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7233 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 7234 ierr = VecDestroy(&v);CHKERRQ(ierr); 7235 } 7236 } else { 7237 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 7238 if (use_vwgt) { 7239 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 7240 v_wgt[0] = n; 7241 } 7242 } 7243 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 7244 7245 /* Partition */ 7246 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 7247 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 7248 if (v_wgt) { 7249 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 7250 } 7251 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7252 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 7253 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 7254 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 7255 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 7256 7257 /* renumber new_ranks to avoid "holes" in new set of processors */ 7258 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 7259 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 7260 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7261 if (!aggregate) { 7262 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7263 #if defined(PETSC_USE_DEBUG) 7264 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7265 #endif 7266 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7267 } else if (oldranks) { 7268 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7269 } else { 7270 ranks_send_to_idx[0] = is_indices[0]; 7271 } 7272 } else { 7273 PetscInt idx = 0; 7274 PetscMPIInt tag; 7275 MPI_Request *reqs; 7276 7277 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 7278 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 7279 for (i=rstart;i<rend;i++) { 7280 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 7281 } 7282 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 7283 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7284 ierr = PetscFree(reqs);CHKERRQ(ierr); 7285 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7286 #if defined(PETSC_USE_DEBUG) 7287 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7288 #endif 7289 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7290 } else if (oldranks) { 7291 ranks_send_to_idx[0] = oldranks[idx]; 7292 } else { 7293 ranks_send_to_idx[0] = idx; 7294 } 7295 } 7296 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7297 /* clean up */ 7298 ierr = PetscFree(oldranks);CHKERRQ(ierr); 7299 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 7300 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 7301 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 7302 } 7303 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 7304 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7305 7306 /* assemble parallel IS for sends */ 7307 i = 1; 7308 if (!color) i=0; 7309 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 7310 PetscFunctionReturn(0); 7311 } 7312 7313 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7314 7315 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7316 { 7317 Mat local_mat; 7318 IS is_sends_internal; 7319 PetscInt rows,cols,new_local_rows; 7320 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7321 PetscBool ismatis,isdense,newisdense,destroy_mat; 7322 ISLocalToGlobalMapping l2gmap; 7323 PetscInt* l2gmap_indices; 7324 const PetscInt* is_indices; 7325 MatType new_local_type; 7326 /* buffers */ 7327 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7328 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7329 PetscInt *recv_buffer_idxs_local; 7330 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 7331 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7332 /* MPI */ 7333 MPI_Comm comm,comm_n; 7334 PetscSubcomm subcomm; 7335 PetscMPIInt n_sends,n_recvs,size; 7336 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7337 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7338 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7339 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7340 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7341 PetscErrorCode ierr; 7342 7343 PetscFunctionBegin; 7344 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7345 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7346 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7347 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7348 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7349 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7350 PetscValidLogicalCollectiveBool(mat,reuse,6); 7351 PetscValidLogicalCollectiveInt(mat,nis,8); 7352 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7353 if (nvecs) { 7354 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7355 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7356 } 7357 /* further checks */ 7358 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7359 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7360 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7361 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7362 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7363 if (reuse && *mat_n) { 7364 PetscInt mrows,mcols,mnrows,mncols; 7365 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7366 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7367 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7368 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7369 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7370 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7371 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7372 } 7373 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7374 PetscValidLogicalCollectiveInt(mat,bs,0); 7375 7376 /* prepare IS for sending if not provided */ 7377 if (!is_sends) { 7378 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7379 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7380 } else { 7381 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7382 is_sends_internal = is_sends; 7383 } 7384 7385 /* get comm */ 7386 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7387 7388 /* compute number of sends */ 7389 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7390 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7391 7392 /* compute number of receives */ 7393 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 7394 ierr = PetscMalloc1(size,&iflags);CHKERRQ(ierr); 7395 ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr); 7396 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7397 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7398 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7399 ierr = PetscFree(iflags);CHKERRQ(ierr); 7400 7401 /* restrict comm if requested */ 7402 subcomm = 0; 7403 destroy_mat = PETSC_FALSE; 7404 if (restrict_comm) { 7405 PetscMPIInt color,subcommsize; 7406 7407 color = 0; 7408 if (restrict_full) { 7409 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7410 } else { 7411 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7412 } 7413 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7414 subcommsize = size - subcommsize; 7415 /* check if reuse has been requested */ 7416 if (reuse) { 7417 if (*mat_n) { 7418 PetscMPIInt subcommsize2; 7419 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 7420 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7421 comm_n = PetscObjectComm((PetscObject)*mat_n); 7422 } else { 7423 comm_n = PETSC_COMM_SELF; 7424 } 7425 } else { /* MAT_INITIAL_MATRIX */ 7426 PetscMPIInt rank; 7427 7428 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 7429 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7430 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7431 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7432 comm_n = PetscSubcommChild(subcomm); 7433 } 7434 /* flag to destroy *mat_n if not significative */ 7435 if (color) destroy_mat = PETSC_TRUE; 7436 } else { 7437 comm_n = comm; 7438 } 7439 7440 /* prepare send/receive buffers */ 7441 ierr = PetscMalloc1(size,&ilengths_idxs);CHKERRQ(ierr); 7442 ierr = PetscMemzero(ilengths_idxs,size*sizeof(*ilengths_idxs));CHKERRQ(ierr); 7443 ierr = PetscMalloc1(size,&ilengths_vals);CHKERRQ(ierr); 7444 ierr = PetscMemzero(ilengths_vals,size*sizeof(*ilengths_vals));CHKERRQ(ierr); 7445 if (nis) { 7446 ierr = PetscCalloc1(size,&ilengths_idxs_is);CHKERRQ(ierr); 7447 } 7448 7449 /* Get data from local matrices */ 7450 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7451 /* TODO: See below some guidelines on how to prepare the local buffers */ 7452 /* 7453 send_buffer_vals should contain the raw values of the local matrix 7454 send_buffer_idxs should contain: 7455 - MatType_PRIVATE type 7456 - PetscInt size_of_l2gmap 7457 - PetscInt global_row_indices[size_of_l2gmap] 7458 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7459 */ 7460 else { 7461 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7462 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7463 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7464 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7465 send_buffer_idxs[1] = i; 7466 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7467 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 7468 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7469 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7470 for (i=0;i<n_sends;i++) { 7471 ilengths_vals[is_indices[i]] = len*len; 7472 ilengths_idxs[is_indices[i]] = len+2; 7473 } 7474 } 7475 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7476 /* additional is (if any) */ 7477 if (nis) { 7478 PetscMPIInt psum; 7479 PetscInt j; 7480 for (j=0,psum=0;j<nis;j++) { 7481 PetscInt plen; 7482 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7483 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7484 psum += len+1; /* indices + lenght */ 7485 } 7486 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7487 for (j=0,psum=0;j<nis;j++) { 7488 PetscInt plen; 7489 const PetscInt *is_array_idxs; 7490 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7491 send_buffer_idxs_is[psum] = plen; 7492 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7493 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 7494 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7495 psum += plen+1; /* indices + lenght */ 7496 } 7497 for (i=0;i<n_sends;i++) { 7498 ilengths_idxs_is[is_indices[i]] = psum; 7499 } 7500 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7501 } 7502 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7503 7504 buf_size_idxs = 0; 7505 buf_size_vals = 0; 7506 buf_size_idxs_is = 0; 7507 buf_size_vecs = 0; 7508 for (i=0;i<n_recvs;i++) { 7509 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7510 buf_size_vals += (PetscInt)olengths_vals[i]; 7511 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7512 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7513 } 7514 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7515 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7516 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7517 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7518 7519 /* get new tags for clean communications */ 7520 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7521 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7522 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7523 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7524 7525 /* allocate for requests */ 7526 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7527 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7528 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7529 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7530 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7531 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7532 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7533 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7534 7535 /* communications */ 7536 ptr_idxs = recv_buffer_idxs; 7537 ptr_vals = recv_buffer_vals; 7538 ptr_idxs_is = recv_buffer_idxs_is; 7539 ptr_vecs = recv_buffer_vecs; 7540 for (i=0;i<n_recvs;i++) { 7541 source_dest = onodes[i]; 7542 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7543 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7544 ptr_idxs += olengths_idxs[i]; 7545 ptr_vals += olengths_vals[i]; 7546 if (nis) { 7547 source_dest = onodes_is[i]; 7548 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7549 ptr_idxs_is += olengths_idxs_is[i]; 7550 } 7551 if (nvecs) { 7552 source_dest = onodes[i]; 7553 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7554 ptr_vecs += olengths_idxs[i]-2; 7555 } 7556 } 7557 for (i=0;i<n_sends;i++) { 7558 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7559 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7560 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7561 if (nis) { 7562 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7563 } 7564 if (nvecs) { 7565 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7566 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7567 } 7568 } 7569 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7570 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7571 7572 /* assemble new l2g map */ 7573 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7574 ptr_idxs = recv_buffer_idxs; 7575 new_local_rows = 0; 7576 for (i=0;i<n_recvs;i++) { 7577 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7578 ptr_idxs += olengths_idxs[i]; 7579 } 7580 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7581 ptr_idxs = recv_buffer_idxs; 7582 new_local_rows = 0; 7583 for (i=0;i<n_recvs;i++) { 7584 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 7585 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7586 ptr_idxs += olengths_idxs[i]; 7587 } 7588 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7589 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7590 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7591 7592 /* infer new local matrix type from received local matrices type */ 7593 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7594 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7595 if (n_recvs) { 7596 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7597 ptr_idxs = recv_buffer_idxs; 7598 for (i=0;i<n_recvs;i++) { 7599 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7600 new_local_type_private = MATAIJ_PRIVATE; 7601 break; 7602 } 7603 ptr_idxs += olengths_idxs[i]; 7604 } 7605 switch (new_local_type_private) { 7606 case MATDENSE_PRIVATE: 7607 new_local_type = MATSEQAIJ; 7608 bs = 1; 7609 break; 7610 case MATAIJ_PRIVATE: 7611 new_local_type = MATSEQAIJ; 7612 bs = 1; 7613 break; 7614 case MATBAIJ_PRIVATE: 7615 new_local_type = MATSEQBAIJ; 7616 break; 7617 case MATSBAIJ_PRIVATE: 7618 new_local_type = MATSEQSBAIJ; 7619 break; 7620 default: 7621 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7622 break; 7623 } 7624 } else { /* by default, new_local_type is seqaij */ 7625 new_local_type = MATSEQAIJ; 7626 bs = 1; 7627 } 7628 7629 /* create MATIS object if needed */ 7630 if (!reuse) { 7631 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7632 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7633 } else { 7634 /* it also destroys the local matrices */ 7635 if (*mat_n) { 7636 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7637 } else { /* this is a fake object */ 7638 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7639 } 7640 } 7641 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7642 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7643 7644 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7645 7646 /* Global to local map of received indices */ 7647 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7648 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7649 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7650 7651 /* restore attributes -> type of incoming data and its size */ 7652 buf_size_idxs = 0; 7653 for (i=0;i<n_recvs;i++) { 7654 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7655 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7656 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7657 } 7658 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7659 7660 /* set preallocation */ 7661 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7662 if (!newisdense) { 7663 PetscInt *new_local_nnz=0; 7664 7665 ptr_idxs = recv_buffer_idxs_local; 7666 if (n_recvs) { 7667 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7668 } 7669 for (i=0;i<n_recvs;i++) { 7670 PetscInt j; 7671 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7672 for (j=0;j<*(ptr_idxs+1);j++) { 7673 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7674 } 7675 } else { 7676 /* TODO */ 7677 } 7678 ptr_idxs += olengths_idxs[i]; 7679 } 7680 if (new_local_nnz) { 7681 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7682 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7683 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7684 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7685 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7686 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7687 } else { 7688 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7689 } 7690 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7691 } else { 7692 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7693 } 7694 7695 /* set values */ 7696 ptr_vals = recv_buffer_vals; 7697 ptr_idxs = recv_buffer_idxs_local; 7698 for (i=0;i<n_recvs;i++) { 7699 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7700 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7701 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7702 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7703 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7704 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7705 } else { 7706 /* TODO */ 7707 } 7708 ptr_idxs += olengths_idxs[i]; 7709 ptr_vals += olengths_vals[i]; 7710 } 7711 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7712 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7713 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7714 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7715 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7716 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7717 7718 #if 0 7719 if (!restrict_comm) { /* check */ 7720 Vec lvec,rvec; 7721 PetscReal infty_error; 7722 7723 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7724 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7725 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7726 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7727 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7728 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7729 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7730 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7731 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7732 } 7733 #endif 7734 7735 /* assemble new additional is (if any) */ 7736 if (nis) { 7737 PetscInt **temp_idxs,*count_is,j,psum; 7738 7739 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7740 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7741 ptr_idxs = recv_buffer_idxs_is; 7742 psum = 0; 7743 for (i=0;i<n_recvs;i++) { 7744 for (j=0;j<nis;j++) { 7745 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7746 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7747 psum += plen; 7748 ptr_idxs += plen+1; /* shift pointer to received data */ 7749 } 7750 } 7751 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7752 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7753 for (i=1;i<nis;i++) { 7754 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7755 } 7756 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7757 ptr_idxs = recv_buffer_idxs_is; 7758 for (i=0;i<n_recvs;i++) { 7759 for (j=0;j<nis;j++) { 7760 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7761 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7762 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7763 ptr_idxs += plen+1; /* shift pointer to received data */ 7764 } 7765 } 7766 for (i=0;i<nis;i++) { 7767 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7768 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7769 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7770 } 7771 ierr = PetscFree(count_is);CHKERRQ(ierr); 7772 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7773 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7774 } 7775 /* free workspace */ 7776 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7777 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7778 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7779 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7780 if (isdense) { 7781 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7782 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7783 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7784 } else { 7785 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7786 } 7787 if (nis) { 7788 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7789 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7790 } 7791 7792 if (nvecs) { 7793 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7794 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7795 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7796 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7797 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7798 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7799 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7800 /* set values */ 7801 ptr_vals = recv_buffer_vecs; 7802 ptr_idxs = recv_buffer_idxs_local; 7803 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7804 for (i=0;i<n_recvs;i++) { 7805 PetscInt j; 7806 for (j=0;j<*(ptr_idxs+1);j++) { 7807 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7808 } 7809 ptr_idxs += olengths_idxs[i]; 7810 ptr_vals += olengths_idxs[i]-2; 7811 } 7812 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7813 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7814 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7815 } 7816 7817 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7818 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7819 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7820 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7821 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7822 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7823 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7824 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7825 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7826 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7827 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7828 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7829 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7830 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7831 ierr = PetscFree(onodes);CHKERRQ(ierr); 7832 if (nis) { 7833 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7834 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7835 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7836 } 7837 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7838 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7839 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7840 for (i=0;i<nis;i++) { 7841 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7842 } 7843 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7844 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7845 } 7846 *mat_n = NULL; 7847 } 7848 PetscFunctionReturn(0); 7849 } 7850 7851 /* temporary hack into ksp private data structure */ 7852 #include <petsc/private/kspimpl.h> 7853 7854 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7855 { 7856 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7857 PC_IS *pcis = (PC_IS*)pc->data; 7858 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7859 Mat coarsedivudotp = NULL; 7860 Mat coarseG,t_coarse_mat_is; 7861 MatNullSpace CoarseNullSpace = NULL; 7862 ISLocalToGlobalMapping coarse_islg; 7863 IS coarse_is,*isarray; 7864 PetscInt i,im_active=-1,active_procs=-1; 7865 PetscInt nis,nisdofs,nisneu,nisvert; 7866 PetscInt coarse_eqs_per_proc; 7867 PC pc_temp; 7868 PCType coarse_pc_type; 7869 KSPType coarse_ksp_type; 7870 PetscBool multilevel_requested,multilevel_allowed; 7871 PetscBool coarse_reuse; 7872 PetscInt ncoarse,nedcfield; 7873 PetscBool compute_vecs = PETSC_FALSE; 7874 PetscScalar *array; 7875 MatReuse coarse_mat_reuse; 7876 PetscBool restr, full_restr, have_void; 7877 PetscMPIInt size; 7878 PetscErrorCode ierr; 7879 7880 PetscFunctionBegin; 7881 ierr = PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 7882 /* Assign global numbering to coarse dofs */ 7883 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7884 PetscInt ocoarse_size; 7885 compute_vecs = PETSC_TRUE; 7886 7887 pcbddc->new_primal_space = PETSC_TRUE; 7888 ocoarse_size = pcbddc->coarse_size; 7889 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7890 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7891 /* see if we can avoid some work */ 7892 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7893 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7894 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7895 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7896 coarse_reuse = PETSC_FALSE; 7897 } else { /* we can safely reuse already computed coarse matrix */ 7898 coarse_reuse = PETSC_TRUE; 7899 } 7900 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7901 coarse_reuse = PETSC_FALSE; 7902 } 7903 /* reset any subassembling information */ 7904 if (!coarse_reuse || pcbddc->recompute_topography) { 7905 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7906 } 7907 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7908 coarse_reuse = PETSC_TRUE; 7909 } 7910 /* assemble coarse matrix */ 7911 if (coarse_reuse && pcbddc->coarse_ksp) { 7912 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7913 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7914 coarse_mat_reuse = MAT_REUSE_MATRIX; 7915 } else { 7916 coarse_mat = NULL; 7917 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7918 } 7919 7920 /* creates temporary l2gmap and IS for coarse indexes */ 7921 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7922 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7923 7924 /* creates temporary MATIS object for coarse matrix */ 7925 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7926 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7927 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7928 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7929 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7930 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7931 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7932 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7933 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7934 7935 /* count "active" (i.e. with positive local size) and "void" processes */ 7936 im_active = !!(pcis->n); 7937 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7938 7939 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7940 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7941 /* full_restr : just use the receivers from the subassembling pattern */ 7942 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7943 coarse_mat_is = NULL; 7944 multilevel_allowed = PETSC_FALSE; 7945 multilevel_requested = PETSC_FALSE; 7946 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7947 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7948 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 7949 if (multilevel_requested) { 7950 ncoarse = active_procs/pcbddc->coarsening_ratio; 7951 restr = PETSC_FALSE; 7952 full_restr = PETSC_FALSE; 7953 } else { 7954 ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc); 7955 restr = PETSC_TRUE; 7956 full_restr = PETSC_TRUE; 7957 } 7958 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7959 ncoarse = PetscMax(1,ncoarse); 7960 if (!pcbddc->coarse_subassembling) { 7961 if (pcbddc->coarsening_ratio > 1) { 7962 if (multilevel_requested) { 7963 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7964 } else { 7965 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7966 } 7967 } else { 7968 PetscMPIInt rank; 7969 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7970 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 7971 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7972 } 7973 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7974 PetscInt psum; 7975 if (pcbddc->coarse_ksp) psum = 1; 7976 else psum = 0; 7977 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7978 if (ncoarse < size) have_void = PETSC_TRUE; 7979 } 7980 /* determine if we can go multilevel */ 7981 if (multilevel_requested) { 7982 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7983 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7984 } 7985 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7986 7987 /* dump subassembling pattern */ 7988 if (pcbddc->dbg_flag && multilevel_allowed) { 7989 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7990 } 7991 7992 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7993 nedcfield = -1; 7994 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7995 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7996 const PetscInt *idxs; 7997 ISLocalToGlobalMapping tmap; 7998 7999 /* create map between primal indices (in local representative ordering) and local primal numbering */ 8000 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 8001 /* allocate space for temporary storage */ 8002 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 8003 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 8004 /* allocate for IS array */ 8005 nisdofs = pcbddc->n_ISForDofsLocal; 8006 if (pcbddc->nedclocal) { 8007 if (pcbddc->nedfield > -1) { 8008 nedcfield = pcbddc->nedfield; 8009 } else { 8010 nedcfield = 0; 8011 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs); 8012 nisdofs = 1; 8013 } 8014 } 8015 nisneu = !!pcbddc->NeumannBoundariesLocal; 8016 nisvert = 0; /* nisvert is not used */ 8017 nis = nisdofs + nisneu + nisvert; 8018 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 8019 /* dofs splitting */ 8020 for (i=0;i<nisdofs;i++) { 8021 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 8022 if (nedcfield != i) { 8023 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 8024 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 8025 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8026 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 8027 } else { 8028 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 8029 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8030 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8031 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout); 8032 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8033 } 8034 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8035 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 8036 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 8037 } 8038 /* neumann boundaries */ 8039 if (pcbddc->NeumannBoundariesLocal) { 8040 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 8041 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 8042 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8043 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8044 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8045 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8046 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 8047 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 8048 } 8049 /* free memory */ 8050 ierr = PetscFree(tidxs);CHKERRQ(ierr); 8051 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 8052 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 8053 } else { 8054 nis = 0; 8055 nisdofs = 0; 8056 nisneu = 0; 8057 nisvert = 0; 8058 isarray = NULL; 8059 } 8060 /* destroy no longer needed map */ 8061 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 8062 8063 /* subassemble */ 8064 if (multilevel_allowed) { 8065 Vec vp[1]; 8066 PetscInt nvecs = 0; 8067 PetscBool reuse,reuser; 8068 8069 if (coarse_mat) reuse = PETSC_TRUE; 8070 else reuse = PETSC_FALSE; 8071 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8072 vp[0] = NULL; 8073 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8074 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 8075 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 8076 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 8077 nvecs = 1; 8078 8079 if (pcbddc->divudotp) { 8080 Mat B,loc_divudotp; 8081 Vec v,p; 8082 IS dummy; 8083 PetscInt np; 8084 8085 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 8086 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 8087 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 8088 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 8089 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 8090 ierr = VecSet(p,1.);CHKERRQ(ierr); 8091 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 8092 ierr = VecDestroy(&p);CHKERRQ(ierr); 8093 ierr = MatDestroy(&B);CHKERRQ(ierr); 8094 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 8095 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 8096 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 8097 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 8098 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 8099 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8100 ierr = VecDestroy(&v);CHKERRQ(ierr); 8101 } 8102 } 8103 if (reuser) { 8104 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8105 } else { 8106 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8107 } 8108 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8109 PetscScalar *arraym,*arrayv; 8110 PetscInt nl; 8111 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 8112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 8113 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8114 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 8115 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 8116 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 8117 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8118 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 8119 } else { 8120 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 8121 } 8122 } else { 8123 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 8124 } 8125 if (coarse_mat_is || coarse_mat) { 8126 PetscMPIInt size; 8127 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 8128 if (!multilevel_allowed) { 8129 ierr = MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 8130 } else { 8131 Mat A; 8132 8133 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8134 if (coarse_mat_is) { 8135 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8136 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 8137 coarse_mat = coarse_mat_is; 8138 } 8139 /* be sure we don't have MatSeqDENSE as local mat */ 8140 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 8141 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 8142 } 8143 } 8144 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 8145 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 8146 8147 /* create local to global scatters for coarse problem */ 8148 if (compute_vecs) { 8149 PetscInt lrows; 8150 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 8151 if (coarse_mat) { 8152 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 8153 } else { 8154 lrows = 0; 8155 } 8156 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 8157 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 8158 ierr = VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);CHKERRQ(ierr); 8159 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8160 ierr = VecScatterCreateWithData(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8161 } 8162 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 8163 8164 /* set defaults for coarse KSP and PC */ 8165 if (multilevel_allowed) { 8166 coarse_ksp_type = KSPRICHARDSON; 8167 coarse_pc_type = PCBDDC; 8168 } else { 8169 coarse_ksp_type = KSPPREONLY; 8170 coarse_pc_type = PCREDUNDANT; 8171 } 8172 8173 /* print some info if requested */ 8174 if (pcbddc->dbg_flag) { 8175 if (!multilevel_allowed) { 8176 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8177 if (multilevel_requested) { 8178 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8179 } else if (pcbddc->max_levels) { 8180 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);CHKERRQ(ierr); 8181 } 8182 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8183 } 8184 } 8185 8186 /* communicate coarse discrete gradient */ 8187 coarseG = NULL; 8188 if (pcbddc->nedcG && multilevel_allowed) { 8189 MPI_Comm ccomm; 8190 if (coarse_mat) { 8191 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8192 } else { 8193 ccomm = MPI_COMM_NULL; 8194 } 8195 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 8196 } 8197 8198 /* create the coarse KSP object only once with defaults */ 8199 if (coarse_mat) { 8200 PetscBool isredundant,isnn,isbddc; 8201 PetscViewer dbg_viewer = NULL; 8202 8203 if (pcbddc->dbg_flag) { 8204 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8205 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8206 } 8207 if (!pcbddc->coarse_ksp) { 8208 char prefix[256],str_level[16]; 8209 size_t len; 8210 8211 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 8212 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8213 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 8214 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 8215 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8216 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 8217 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 8218 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8219 /* TODO is this logic correct? should check for coarse_mat type */ 8220 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8221 /* prefix */ 8222 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 8223 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 8224 if (!pcbddc->current_level) { 8225 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));CHKERRQ(ierr); 8226 ierr = PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));CHKERRQ(ierr); 8227 } else { 8228 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 8229 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8230 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8231 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8232 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 8233 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 8234 ierr = PetscStrlcat(prefix,str_level,sizeof(prefix));CHKERRQ(ierr); 8235 } 8236 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 8237 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8238 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8239 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8240 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8241 /* allow user customization */ 8242 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 8243 /* get some info after set from options */ 8244 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8245 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 8246 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8247 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 8248 if (multilevel_allowed && !isbddc && !isnn) { 8249 isbddc = PETSC_TRUE; 8250 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8251 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8252 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8253 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8254 } 8255 } 8256 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8257 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8258 if (nisdofs) { 8259 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 8260 for (i=0;i<nisdofs;i++) { 8261 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8262 } 8263 } 8264 if (nisneu) { 8265 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 8266 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 8267 } 8268 if (nisvert) { 8269 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 8270 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 8271 } 8272 if (coarseG) { 8273 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 8274 } 8275 8276 /* get some info after set from options */ 8277 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8278 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8279 if (isbddc && !multilevel_allowed) { 8280 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8281 isbddc = PETSC_FALSE; 8282 } 8283 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 8284 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 8285 if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) { 8286 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8287 isbddc = PETSC_TRUE; 8288 } 8289 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 8290 if (isredundant) { 8291 KSP inner_ksp; 8292 PC inner_pc; 8293 8294 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 8295 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 8296 } 8297 8298 /* parameters which miss an API */ 8299 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8300 if (isbddc) { 8301 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8302 8303 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8304 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8305 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8306 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8307 if (pcbddc_coarse->benign_saddle_point) { 8308 Mat coarsedivudotp_is; 8309 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8310 IS row,col; 8311 const PetscInt *gidxs; 8312 PetscInt n,st,M,N; 8313 8314 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 8315 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 8316 st = st-n; 8317 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 8318 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 8319 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 8320 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8321 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 8322 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8323 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 8324 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 8325 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 8326 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 8327 ierr = ISDestroy(&row);CHKERRQ(ierr); 8328 ierr = ISDestroy(&col);CHKERRQ(ierr); 8329 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 8330 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8331 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8332 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8333 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8334 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8335 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8336 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8337 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8338 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8339 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8340 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8341 } 8342 } 8343 8344 /* propagate symmetry info of coarse matrix */ 8345 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8346 if (pc->pmat->symmetric_set) { 8347 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8348 } 8349 if (pc->pmat->hermitian_set) { 8350 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8351 } 8352 if (pc->pmat->spd_set) { 8353 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8354 } 8355 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8356 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8357 } 8358 /* set operators */ 8359 ierr = MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");CHKERRQ(ierr); 8360 ierr = MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 8361 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8362 if (pcbddc->dbg_flag) { 8363 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8364 } 8365 } 8366 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8367 ierr = PetscFree(isarray);CHKERRQ(ierr); 8368 #if 0 8369 { 8370 PetscViewer viewer; 8371 char filename[256]; 8372 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8373 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8374 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8375 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8376 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8377 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8378 } 8379 #endif 8380 8381 if (pcbddc->coarse_ksp) { 8382 Vec crhs,csol; 8383 8384 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8385 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8386 if (!csol) { 8387 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8388 } 8389 if (!crhs) { 8390 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8391 } 8392 } 8393 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8394 8395 /* compute null space for coarse solver if the benign trick has been requested */ 8396 if (pcbddc->benign_null) { 8397 8398 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8399 for (i=0;i<pcbddc->benign_n;i++) { 8400 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8401 } 8402 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8403 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8404 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8405 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8406 if (coarse_mat) { 8407 Vec nullv; 8408 PetscScalar *array,*array2; 8409 PetscInt nl; 8410 8411 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8412 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8413 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8414 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8415 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 8416 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8417 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8418 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8419 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8420 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8421 } 8422 } 8423 ierr = PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8424 8425 ierr = PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8426 if (pcbddc->coarse_ksp) { 8427 PetscBool ispreonly; 8428 8429 if (CoarseNullSpace) { 8430 PetscBool isnull; 8431 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8432 if (isnull) { 8433 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8434 } 8435 /* TODO: add local nullspaces (if any) */ 8436 } 8437 /* setup coarse ksp */ 8438 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8439 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8440 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8441 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 8442 KSP check_ksp; 8443 KSPType check_ksp_type; 8444 PC check_pc; 8445 Vec check_vec,coarse_vec; 8446 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8447 PetscInt its; 8448 PetscBool compute_eigs; 8449 PetscReal *eigs_r,*eigs_c; 8450 PetscInt neigs; 8451 const char *prefix; 8452 8453 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8454 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8455 ierr = PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);CHKERRQ(ierr); 8456 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 8457 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8458 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8459 /* prevent from setup unneeded object */ 8460 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8461 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8462 if (ispreonly) { 8463 check_ksp_type = KSPPREONLY; 8464 compute_eigs = PETSC_FALSE; 8465 } else { 8466 check_ksp_type = KSPGMRES; 8467 compute_eigs = PETSC_TRUE; 8468 } 8469 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8470 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8471 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8472 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8473 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8474 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8475 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8476 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8477 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8478 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8479 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8480 /* create random vec */ 8481 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8482 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8483 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8484 /* solve coarse problem */ 8485 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8486 /* set eigenvalue estimation if preonly has not been requested */ 8487 if (compute_eigs) { 8488 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8489 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8490 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8491 if (neigs) { 8492 lambda_max = eigs_r[neigs-1]; 8493 lambda_min = eigs_r[0]; 8494 if (pcbddc->use_coarse_estimates) { 8495 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8496 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8497 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8498 } 8499 } 8500 } 8501 } 8502 8503 /* check coarse problem residual error */ 8504 if (pcbddc->dbg_flag) { 8505 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8506 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8507 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8508 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8509 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8510 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8511 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8512 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8513 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8514 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8515 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8516 if (CoarseNullSpace) { 8517 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8518 } 8519 if (compute_eigs) { 8520 PetscReal lambda_max_s,lambda_min_s; 8521 KSPConvergedReason reason; 8522 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8523 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8524 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8525 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8526 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8527 for (i=0;i<neigs;i++) { 8528 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8529 } 8530 } 8531 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8532 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8533 } 8534 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8535 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8536 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8537 if (compute_eigs) { 8538 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8539 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8540 } 8541 } 8542 } 8543 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8544 /* print additional info */ 8545 if (pcbddc->dbg_flag) { 8546 /* waits until all processes reaches this point */ 8547 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8548 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);CHKERRQ(ierr); 8549 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8550 } 8551 8552 /* free memory */ 8553 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8554 ierr = PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8555 PetscFunctionReturn(0); 8556 } 8557 8558 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8559 { 8560 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8561 PC_IS* pcis = (PC_IS*)pc->data; 8562 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8563 IS subset,subset_mult,subset_n; 8564 PetscInt local_size,coarse_size=0; 8565 PetscInt *local_primal_indices=NULL; 8566 const PetscInt *t_local_primal_indices; 8567 PetscErrorCode ierr; 8568 8569 PetscFunctionBegin; 8570 /* Compute global number of coarse dofs */ 8571 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8572 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8573 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8574 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8575 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8576 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8577 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8578 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8579 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8580 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8581 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8582 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8583 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 8584 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8585 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8586 8587 /* check numbering */ 8588 if (pcbddc->dbg_flag) { 8589 PetscScalar coarsesum,*array,*array2; 8590 PetscInt i; 8591 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8592 8593 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8594 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8595 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8596 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8597 /* counter */ 8598 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8599 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8600 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8601 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8602 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8603 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8604 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8605 for (i=0;i<pcbddc->local_primal_size;i++) { 8606 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8607 } 8608 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8609 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8610 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8611 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8612 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8613 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8614 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8615 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8616 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8617 for (i=0;i<pcis->n;i++) { 8618 if (array[i] != 0.0 && array[i] != array2[i]) { 8619 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8620 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8621 set_error = PETSC_TRUE; 8622 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8623 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8624 } 8625 } 8626 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8627 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8628 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8629 for (i=0;i<pcis->n;i++) { 8630 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8631 } 8632 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8633 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8634 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8635 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8636 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8637 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8638 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8639 PetscInt *gidxs; 8640 8641 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8642 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8643 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8644 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8645 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8646 for (i=0;i<pcbddc->local_primal_size;i++) { 8647 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8648 } 8649 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8650 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8651 } 8652 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8653 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8654 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8655 } 8656 8657 /* get back data */ 8658 *coarse_size_n = coarse_size; 8659 *local_primal_indices_n = local_primal_indices; 8660 PetscFunctionReturn(0); 8661 } 8662 8663 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8664 { 8665 IS localis_t; 8666 PetscInt i,lsize,*idxs,n; 8667 PetscScalar *vals; 8668 PetscErrorCode ierr; 8669 8670 PetscFunctionBegin; 8671 /* get indices in local ordering exploiting local to global map */ 8672 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8673 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8674 for (i=0;i<lsize;i++) vals[i] = 1.0; 8675 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8676 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8677 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8678 if (idxs) { /* multilevel guard */ 8679 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 8680 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8681 } 8682 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8683 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8684 ierr = PetscFree(vals);CHKERRQ(ierr); 8685 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8686 /* now compute set in local ordering */ 8687 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8688 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8689 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8690 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8691 for (i=0,lsize=0;i<n;i++) { 8692 if (PetscRealPart(vals[i]) > 0.5) { 8693 lsize++; 8694 } 8695 } 8696 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8697 for (i=0,lsize=0;i<n;i++) { 8698 if (PetscRealPart(vals[i]) > 0.5) { 8699 idxs[lsize++] = i; 8700 } 8701 } 8702 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8703 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8704 *localis = localis_t; 8705 PetscFunctionReturn(0); 8706 } 8707 8708 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8709 { 8710 PC_IS *pcis=(PC_IS*)pc->data; 8711 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8712 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8713 Mat S_j; 8714 PetscInt *used_xadj,*used_adjncy; 8715 PetscBool free_used_adj; 8716 PetscErrorCode ierr; 8717 8718 PetscFunctionBegin; 8719 ierr = PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8720 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8721 free_used_adj = PETSC_FALSE; 8722 if (pcbddc->sub_schurs_layers == -1) { 8723 used_xadj = NULL; 8724 used_adjncy = NULL; 8725 } else { 8726 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8727 used_xadj = pcbddc->mat_graph->xadj; 8728 used_adjncy = pcbddc->mat_graph->adjncy; 8729 } else if (pcbddc->computed_rowadj) { 8730 used_xadj = pcbddc->mat_graph->xadj; 8731 used_adjncy = pcbddc->mat_graph->adjncy; 8732 } else { 8733 PetscBool flg_row=PETSC_FALSE; 8734 const PetscInt *xadj,*adjncy; 8735 PetscInt nvtxs; 8736 8737 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8738 if (flg_row) { 8739 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8740 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8741 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8742 free_used_adj = PETSC_TRUE; 8743 } else { 8744 pcbddc->sub_schurs_layers = -1; 8745 used_xadj = NULL; 8746 used_adjncy = NULL; 8747 } 8748 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8749 } 8750 } 8751 8752 /* setup sub_schurs data */ 8753 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8754 if (!sub_schurs->schur_explicit) { 8755 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8756 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8757 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8758 } else { 8759 Mat change = NULL; 8760 Vec scaling = NULL; 8761 IS change_primal = NULL, iP; 8762 PetscInt benign_n; 8763 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8764 PetscBool isseqaij,need_change = PETSC_FALSE; 8765 PetscBool discrete_harmonic = PETSC_FALSE; 8766 8767 if (!pcbddc->use_vertices && reuse_solvers) { 8768 PetscInt n_vertices; 8769 8770 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8771 reuse_solvers = (PetscBool)!n_vertices; 8772 } 8773 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8774 if (!isseqaij) { 8775 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8776 if (matis->A == pcbddc->local_mat) { 8777 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8778 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8779 } else { 8780 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8781 } 8782 } 8783 if (!pcbddc->benign_change_explicit) { 8784 benign_n = pcbddc->benign_n; 8785 } else { 8786 benign_n = 0; 8787 } 8788 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8789 We need a global reduction to avoid possible deadlocks. 8790 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8791 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8792 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8793 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8794 need_change = (PetscBool)(!need_change); 8795 } 8796 /* If the user defines additional constraints, we import them here. 8797 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8798 if (need_change) { 8799 PC_IS *pcisf; 8800 PC_BDDC *pcbddcf; 8801 PC pcf; 8802 8803 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8804 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8805 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8806 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8807 8808 /* hacks */ 8809 pcisf = (PC_IS*)pcf->data; 8810 pcisf->is_B_local = pcis->is_B_local; 8811 pcisf->vec1_N = pcis->vec1_N; 8812 pcisf->BtoNmap = pcis->BtoNmap; 8813 pcisf->n = pcis->n; 8814 pcisf->n_B = pcis->n_B; 8815 pcbddcf = (PC_BDDC*)pcf->data; 8816 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8817 pcbddcf->mat_graph = pcbddc->mat_graph; 8818 pcbddcf->use_faces = PETSC_TRUE; 8819 pcbddcf->use_change_of_basis = PETSC_TRUE; 8820 pcbddcf->use_change_on_faces = PETSC_TRUE; 8821 pcbddcf->use_qr_single = PETSC_TRUE; 8822 pcbddcf->fake_change = PETSC_TRUE; 8823 8824 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8825 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8826 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8827 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8828 change = pcbddcf->ConstraintMatrix; 8829 pcbddcf->ConstraintMatrix = NULL; 8830 8831 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8832 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8833 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8834 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8835 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8836 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8837 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8838 pcf->ops->destroy = NULL; 8839 pcf->ops->reset = NULL; 8840 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8841 } 8842 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8843 8844 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 8845 if (iP) { 8846 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 8847 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 8848 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8849 } 8850 if (discrete_harmonic) { 8851 Mat A; 8852 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 8853 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 8854 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 8855 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8856 ierr = MatDestroy(&A);CHKERRQ(ierr); 8857 } else { 8858 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8859 } 8860 ierr = MatDestroy(&change);CHKERRQ(ierr); 8861 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8862 } 8863 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8864 8865 /* free adjacency */ 8866 if (free_used_adj) { 8867 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8868 } 8869 ierr = PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8870 PetscFunctionReturn(0); 8871 } 8872 8873 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8874 { 8875 PC_IS *pcis=(PC_IS*)pc->data; 8876 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8877 PCBDDCGraph graph; 8878 PetscErrorCode ierr; 8879 8880 PetscFunctionBegin; 8881 /* attach interface graph for determining subsets */ 8882 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8883 IS verticesIS,verticescomm; 8884 PetscInt vsize,*idxs; 8885 8886 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8887 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8888 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8889 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8890 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8891 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8892 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8893 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8894 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8895 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8896 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8897 } else { 8898 graph = pcbddc->mat_graph; 8899 } 8900 /* print some info */ 8901 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8902 IS vertices; 8903 PetscInt nv,nedges,nfaces; 8904 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8905 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8906 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8907 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8908 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8909 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);CHKERRQ(ierr); 8910 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);CHKERRQ(ierr); 8911 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8912 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8913 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8914 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8915 } 8916 8917 /* sub_schurs init */ 8918 if (!pcbddc->sub_schurs) { 8919 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8920 } 8921 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8922 8923 /* free graph struct */ 8924 if (pcbddc->sub_schurs_rebuild) { 8925 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8926 } 8927 PetscFunctionReturn(0); 8928 } 8929 8930 PetscErrorCode PCBDDCCheckOperator(PC pc) 8931 { 8932 PC_IS *pcis=(PC_IS*)pc->data; 8933 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8934 PetscErrorCode ierr; 8935 8936 PetscFunctionBegin; 8937 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8938 IS zerodiag = NULL; 8939 Mat S_j,B0_B=NULL; 8940 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8941 PetscScalar *p0_check,*array,*array2; 8942 PetscReal norm; 8943 PetscInt i; 8944 8945 /* B0 and B0_B */ 8946 if (zerodiag) { 8947 IS dummy; 8948 8949 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8950 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8951 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8952 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8953 } 8954 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8955 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8956 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8957 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8958 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8959 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8960 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8961 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8962 /* S_j */ 8963 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8964 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8965 8966 /* mimic vector in \widetilde{W}_\Gamma */ 8967 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8968 /* continuous in primal space */ 8969 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8970 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8971 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8972 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8973 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8974 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8975 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8976 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8977 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8978 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8979 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8980 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8981 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8982 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8983 8984 /* assemble rhs for coarse problem */ 8985 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8986 /* local with Schur */ 8987 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8988 if (zerodiag) { 8989 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8990 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8991 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8992 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8993 } 8994 /* sum on primal nodes the local contributions */ 8995 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8996 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8997 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8998 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8999 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 9000 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 9001 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 9002 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 9003 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9004 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9005 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9006 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9007 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9008 /* scale primal nodes (BDDC sums contibutions) */ 9009 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 9010 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 9011 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9012 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 9013 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 9014 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9015 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9016 /* global: \widetilde{B0}_B w_\Gamma */ 9017 if (zerodiag) { 9018 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 9019 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 9020 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 9021 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 9022 } 9023 /* BDDC */ 9024 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 9025 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 9026 9027 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 9028 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 9029 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 9030 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);CHKERRQ(ierr); 9031 for (i=0;i<pcbddc->benign_n;i++) { 9032 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));CHKERRQ(ierr); 9033 } 9034 ierr = PetscFree(p0_check);CHKERRQ(ierr); 9035 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 9036 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 9037 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 9038 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 9039 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 9040 } 9041 PetscFunctionReturn(0); 9042 } 9043 9044 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9045 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9046 { 9047 Mat At; 9048 IS rows; 9049 PetscInt rst,ren; 9050 PetscErrorCode ierr; 9051 PetscLayout rmap; 9052 9053 PetscFunctionBegin; 9054 rst = ren = 0; 9055 if (ccomm != MPI_COMM_NULL) { 9056 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 9057 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 9058 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 9059 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 9060 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 9061 } 9062 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 9063 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 9064 ierr = ISDestroy(&rows);CHKERRQ(ierr); 9065 9066 if (ccomm != MPI_COMM_NULL) { 9067 Mat_MPIAIJ *a,*b; 9068 IS from,to; 9069 Vec gvec; 9070 PetscInt lsize; 9071 9072 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 9073 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 9074 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 9075 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 9076 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 9077 a = (Mat_MPIAIJ*)At->data; 9078 b = (Mat_MPIAIJ*)(*B)->data; 9079 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 9080 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 9081 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 9082 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 9083 b->A = a->A; 9084 b->B = a->B; 9085 9086 b->donotstash = a->donotstash; 9087 b->roworiented = a->roworiented; 9088 b->rowindices = 0; 9089 b->rowvalues = 0; 9090 b->getrowactive = PETSC_FALSE; 9091 9092 (*B)->rmap = rmap; 9093 (*B)->factortype = A->factortype; 9094 (*B)->assembled = PETSC_TRUE; 9095 (*B)->insertmode = NOT_SET_VALUES; 9096 (*B)->preallocated = PETSC_TRUE; 9097 9098 if (a->colmap) { 9099 #if defined(PETSC_USE_CTABLE) 9100 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 9101 #else 9102 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 9103 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9104 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9105 #endif 9106 } else b->colmap = 0; 9107 if (a->garray) { 9108 PetscInt len; 9109 len = a->B->cmap->n; 9110 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 9111 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 9112 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 9113 } else b->garray = 0; 9114 9115 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 9116 b->lvec = a->lvec; 9117 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 9118 9119 /* cannot use VecScatterCopy */ 9120 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 9121 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 9122 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 9123 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 9124 ierr = VecScatterCreateWithData(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 9125 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 9126 ierr = ISDestroy(&from);CHKERRQ(ierr); 9127 ierr = ISDestroy(&to);CHKERRQ(ierr); 9128 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 9129 } 9130 ierr = MatDestroy(&At);CHKERRQ(ierr); 9131 PetscFunctionReturn(0); 9132 } 9133