1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 #if !defined(PETSC_USE_COMPLEX) 18 PetscScalar *uwork,*data,*U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM,bN,lwork,lierr,di = 1; 21 PetscInt ulw,i,nr,nc,n; 22 PetscErrorCode ierr; 23 24 PetscFunctionBegin; 25 #if defined(PETSC_MISSING_LAPACK_GESVD) 26 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 27 #else 28 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 29 if (!nr || !nc) PetscFunctionReturn(0); 30 31 /* workspace */ 32 if (!work) { 33 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 34 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 35 } else { 36 ulw = lw; 37 uwork = work; 38 } 39 n = PetscMin(nr,nc); 40 if (!rwork) { 41 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 42 } else { 43 sing = rwork; 44 } 45 46 /* SVD */ 47 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 49 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 50 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 51 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 52 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 53 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 54 ierr = PetscFPTrapPop();CHKERRQ(ierr); 55 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 56 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 57 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 58 if (!rwork) { 59 ierr = PetscFree(sing);CHKERRQ(ierr); 60 } 61 if (!work) { 62 ierr = PetscFree(uwork);CHKERRQ(ierr); 63 } 64 /* create B */ 65 if (!range) { 66 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 67 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 68 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 69 } else { 70 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 71 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 72 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 73 } 74 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 75 ierr = PetscFree(U);CHKERRQ(ierr); 76 #endif 77 #else /* PETSC_USE_COMPLEX */ 78 PetscFunctionBegin; 79 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 80 #endif 81 PetscFunctionReturn(0); 82 } 83 84 /* TODO REMOVE */ 85 #if defined(PRINT_GDET) 86 static int inc = 0; 87 static int lev = 0; 88 #endif 89 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 PetscScalar *vals,v; 122 123 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 124 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 125 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 126 /* v = PetscAbsScalar(vals[0]) */; 127 v = 1.; 128 cvals[0] = vals[0]/v; 129 cvals[1] = vals[1]/v; 130 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 131 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 132 #if defined(PRINT_GDET) 133 { 134 PetscViewer viewer; 135 char filename[256]; 136 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 137 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 138 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 140 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 141 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 142 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 143 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 144 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 145 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 146 } 147 #endif 148 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 149 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 150 } 151 152 PetscFunctionReturn(0); 153 } 154 155 PetscErrorCode PCBDDCNedelecSupport(PC pc) 156 { 157 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 158 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 159 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 160 Vec tvec; 161 PetscSF sfv; 162 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 163 MPI_Comm comm; 164 IS lned,primals,allprimals,nedfieldlocal; 165 IS *eedges,*extrows,*extcols,*alleedges; 166 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 167 PetscScalar *vals,*work; 168 PetscReal *rwork; 169 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 170 PetscInt ne,nv,Lv,order,n,field; 171 PetscInt n_neigh,*neigh,*n_shared,**shared; 172 PetscInt i,j,extmem,cum,maxsize,nee; 173 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 174 PetscInt *sfvleaves,*sfvroots; 175 PetscInt *corners,*cedges; 176 PetscInt *ecount,**eneighs,*vcount,**vneighs; 177 #if defined(PETSC_USE_DEBUG) 178 PetscInt *emarks; 179 #endif 180 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 181 PetscErrorCode ierr; 182 183 PetscFunctionBegin; 184 /* If the discrete gradient is defined for a subset of dofs and global is true, 185 it assumes G is given in global ordering for all the dofs. 186 Otherwise, the ordering is global for the Nedelec field */ 187 order = pcbddc->nedorder; 188 conforming = pcbddc->conforming; 189 field = pcbddc->nedfield; 190 global = pcbddc->nedglobal; 191 setprimal = PETSC_FALSE; 192 print = PETSC_FALSE; 193 singular = PETSC_FALSE; 194 195 /* Command line customization */ 196 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 199 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 200 /* print debug info TODO: to be removed */ 201 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 202 ierr = PetscOptionsEnd();CHKERRQ(ierr); 203 204 /* Return if there are no edges in the decomposition and the problem is not singular */ 205 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 206 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 207 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 208 if (!singular) { 209 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 210 lrc[0] = PETSC_FALSE; 211 for (i=0;i<n;i++) { 212 if (PetscRealPart(vals[i]) > 2.) { 213 lrc[0] = PETSC_TRUE; 214 break; 215 } 216 } 217 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 218 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 219 if (!lrc[1]) PetscFunctionReturn(0); 220 } 221 222 /* Get Nedelec field */ 223 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal); 224 if (pcbddc->n_ISForDofsLocal && field >= 0) { 225 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 226 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 227 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 228 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 229 ne = n; 230 nedfieldlocal = NULL; 231 global = PETSC_TRUE; 232 } else if (field == PETSC_DECIDE) { 233 PetscInt rst,ren,*idx; 234 235 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 236 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 237 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 238 for (i=rst;i<ren;i++) { 239 PetscInt nc; 240 241 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 242 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 243 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 244 } 245 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 246 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 247 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 248 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 249 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 250 } else { 251 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 252 } 253 254 /* Sanity checks */ 255 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 256 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 257 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order); 258 259 /* Just set primal dofs and return */ 260 if (setprimal) { 261 IS enedfieldlocal; 262 PetscInt *eidxs; 263 264 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 265 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 266 if (nedfieldlocal) { 267 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 268 for (i=0,cum=0;i<ne;i++) { 269 if (PetscRealPart(vals[idxs[i]]) > 2.) { 270 eidxs[cum++] = idxs[i]; 271 } 272 } 273 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 274 } else { 275 for (i=0,cum=0;i<ne;i++) { 276 if (PetscRealPart(vals[i]) > 2.) { 277 eidxs[cum++] = i; 278 } 279 } 280 } 281 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 282 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 283 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 284 ierr = PetscFree(eidxs);CHKERRQ(ierr); 285 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 286 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 /* Compute some l2g maps */ 291 if (nedfieldlocal) { 292 IS is; 293 294 /* need to map from the local Nedelec field to local numbering */ 295 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 296 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 297 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 298 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 299 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 300 if (global) { 301 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 302 el2g = al2g; 303 } else { 304 IS gis; 305 306 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 307 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 308 ierr = ISDestroy(&gis);CHKERRQ(ierr); 309 } 310 ierr = ISDestroy(&is);CHKERRQ(ierr); 311 } else { 312 /* restore default */ 313 pcbddc->nedfield = -1; 314 /* one ref for the destruction of al2g, one for el2g */ 315 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 316 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 317 el2g = al2g; 318 fl2g = NULL; 319 } 320 321 /* Start communication to drop connections for interior edges (for cc analysis only) */ 322 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 323 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 324 if (nedfieldlocal) { 325 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 326 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 327 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 328 } else { 329 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 330 } 331 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 332 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 333 334 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 335 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 336 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 337 if (global) { 338 PetscInt rst; 339 340 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 341 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 342 if (matis->sf_rootdata[i] < 2) { 343 matis->sf_rootdata[cum++] = i + rst; 344 } 345 } 346 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 347 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 348 } else { 349 PetscInt *tbz; 350 351 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 352 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 353 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 354 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 355 for (i=0,cum=0;i<ne;i++) 356 if (matis->sf_leafdata[idxs[i]] == 1) 357 tbz[cum++] = i; 358 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 359 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 360 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 361 ierr = PetscFree(tbz);CHKERRQ(ierr); 362 } 363 } else { /* we need the entire G to infer the nullspace */ 364 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 365 G = pcbddc->discretegradient; 366 } 367 368 /* Extract subdomain relevant rows of G */ 369 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 370 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 371 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 372 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 373 ierr = ISDestroy(&lned);CHKERRQ(ierr); 374 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 375 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 376 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 377 378 /* SF for nodal dofs communications */ 379 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 380 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 381 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 382 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 383 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 384 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 385 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 386 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 387 i = singular ? 2 : 1; 388 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 389 390 /* Destroy temporary G created in MATIS format and modified G */ 391 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 392 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 393 ierr = MatDestroy(&G);CHKERRQ(ierr); 394 395 if (print) { 396 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 397 ierr = MatView(lG,NULL);CHKERRQ(ierr); 398 } 399 400 /* Save lG for values insertion in change of basis */ 401 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 402 403 /* Analyze the edge-nodes connections (duplicate lG) */ 404 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 405 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 406 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 407 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 409 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 410 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 411 /* need to import the boundary specification to ensure the 412 proper detection of coarse edges' endpoints */ 413 if (pcbddc->DirichletBoundariesLocal) { 414 IS is; 415 416 if (fl2g) { 417 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 418 } else { 419 is = pcbddc->DirichletBoundariesLocal; 420 } 421 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 422 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 423 for (i=0;i<cum;i++) { 424 if (idxs[i] >= 0) { 425 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 426 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 427 } 428 } 429 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 430 if (fl2g) { 431 ierr = ISDestroy(&is);CHKERRQ(ierr); 432 } 433 } 434 if (pcbddc->NeumannBoundariesLocal) { 435 IS is; 436 437 if (fl2g) { 438 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 439 } else { 440 is = pcbddc->NeumannBoundariesLocal; 441 } 442 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 443 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 444 for (i=0;i<cum;i++) { 445 if (idxs[i] >= 0) { 446 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 447 } 448 } 449 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 450 if (fl2g) { 451 ierr = ISDestroy(&is);CHKERRQ(ierr); 452 } 453 } 454 455 /* Count neighs per dof */ 456 ierr = ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 457 ierr = ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 458 459 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 460 for proper detection of coarse edges' endpoints */ 461 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 462 for (i=0;i<ne;i++) { 463 if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) { 464 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 465 } 466 } 467 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 468 if (!conforming) { 469 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 470 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 471 } 472 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 473 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 474 cum = 0; 475 for (i=0;i<ne;i++) { 476 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 477 if (!PetscBTLookup(btee,i)) { 478 marks[cum++] = i; 479 continue; 480 } 481 /* set badly connected edge dofs as primal */ 482 if (!conforming) { 483 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 484 marks[cum++] = i; 485 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 486 for (j=ii[i];j<ii[i+1];j++) { 487 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 488 } 489 } else { 490 /* every edge dofs should be connected trough a certain number of nodal dofs 491 to other edge dofs belonging to coarse edges 492 - at most 2 endpoints 493 - order-1 interior nodal dofs 494 - no undefined nodal dofs (nconn < order) 495 */ 496 PetscInt ends = 0,ints = 0, undef = 0; 497 for (j=ii[i];j<ii[i+1];j++) { 498 PetscInt v = jj[j],k; 499 PetscInt nconn = iit[v+1]-iit[v]; 500 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 501 if (nconn > order) ends++; 502 else if (nconn == order) ints++; 503 else undef++; 504 } 505 if (undef || ends > 2 || ints != order -1) { 506 marks[cum++] = i; 507 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 508 for (j=ii[i];j<ii[i+1];j++) { 509 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 510 } 511 } 512 } 513 } 514 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 515 if (!order && ii[i+1] != ii[i]) { 516 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 517 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 518 } 519 } 520 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 521 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 522 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 523 if (!conforming) { 524 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 525 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 526 } 527 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 528 529 /* identify splitpoints and corner candidates */ 530 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 531 if (print) { 532 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 533 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 534 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 535 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 536 } 537 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 538 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 539 for (i=0;i<nv;i++) { 540 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 541 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 542 if (!order) { /* variable order */ 543 PetscReal vorder = 0.; 544 545 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 546 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 547 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test); 548 ord = 1; 549 } 550 #if defined(PETSC_USE_DEBUG) 551 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %D connected with nodal dof %D with order %D",test,i,ord); 552 #endif 553 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 554 if (PetscBTLookup(btbd,jj[j])) { 555 bdir = PETSC_TRUE; 556 break; 557 } 558 if (vc != ecount[jj[j]]) { 559 sneighs = PETSC_FALSE; 560 } else { 561 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 562 for (k=0;k<vc;k++) { 563 if (vn[k] != en[k]) { 564 sneighs = PETSC_FALSE; 565 break; 566 } 567 } 568 } 569 } 570 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 571 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir); 572 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 573 } else if (test == ord) { 574 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 575 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i); 576 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 577 } else { 578 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i); 579 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 580 } 581 } 582 } 583 ierr = ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 584 ierr = ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 585 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 586 587 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 588 if (order != 1) { 589 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 590 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 591 for (i=0;i<nv;i++) { 592 if (PetscBTLookup(btvcand,i)) { 593 PetscBool found = PETSC_FALSE; 594 for (j=ii[i];j<ii[i+1] && !found;j++) { 595 PetscInt k,e = jj[j]; 596 if (PetscBTLookup(bte,e)) continue; 597 for (k=iit[e];k<iit[e+1];k++) { 598 PetscInt v = jjt[k]; 599 if (v != i && PetscBTLookup(btvcand,v)) { 600 found = PETSC_TRUE; 601 break; 602 } 603 } 604 } 605 if (!found) { 606 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D CLEARED\n",i); 607 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 608 } else { 609 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D ACCEPTED\n",i); 610 } 611 } 612 } 613 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 614 } 615 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 616 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 617 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 618 619 /* Get the local G^T explicitly */ 620 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 621 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 622 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 623 624 /* Mark interior nodal dofs */ 625 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 626 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 627 for (i=1;i<n_neigh;i++) { 628 for (j=0;j<n_shared[i];j++) { 629 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 630 } 631 } 632 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 633 634 /* communicate corners and splitpoints */ 635 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 636 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 637 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 638 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 639 640 if (print) { 641 IS tbz; 642 643 cum = 0; 644 for (i=0;i<nv;i++) 645 if (sfvleaves[i]) 646 vmarks[cum++] = i; 647 648 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 649 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 650 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 651 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 652 } 653 654 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 655 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 656 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 657 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 658 659 /* Zero rows of lGt corresponding to identified corners 660 and interior nodal dofs */ 661 cum = 0; 662 for (i=0;i<nv;i++) { 663 if (sfvleaves[i]) { 664 vmarks[cum++] = i; 665 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 666 } 667 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 668 } 669 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 670 if (print) { 671 IS tbz; 672 673 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 674 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 675 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 676 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 677 } 678 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 679 ierr = PetscFree(vmarks);CHKERRQ(ierr); 680 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 681 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 682 683 /* Recompute G */ 684 ierr = MatDestroy(&lG);CHKERRQ(ierr); 685 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 686 if (print) { 687 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 688 ierr = MatView(lG,NULL);CHKERRQ(ierr); 689 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 690 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 691 } 692 693 /* Get primal dofs (if any) */ 694 cum = 0; 695 for (i=0;i<ne;i++) { 696 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 697 } 698 if (fl2g) { 699 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 700 } 701 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 702 if (print) { 703 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 704 ierr = ISView(primals,NULL);CHKERRQ(ierr); 705 } 706 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 707 /* TODO: what if the user passed in some of them ? */ 708 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 709 ierr = ISDestroy(&primals);CHKERRQ(ierr); 710 711 /* Compute edge connectivity */ 712 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 713 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 714 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 715 if (fl2g) { 716 PetscBT btf; 717 PetscInt *iia,*jja,*iiu,*jju; 718 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 719 720 /* create CSR for all local dofs */ 721 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 722 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 723 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n); 724 iiu = pcbddc->mat_graph->xadj; 725 jju = pcbddc->mat_graph->adjncy; 726 } else if (pcbddc->use_local_adj) { 727 rest = PETSC_TRUE; 728 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 729 } else { 730 free = PETSC_TRUE; 731 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 732 iiu[0] = 0; 733 for (i=0;i<n;i++) { 734 iiu[i+1] = i+1; 735 jju[i] = -1; 736 } 737 } 738 739 /* import sizes of CSR */ 740 iia[0] = 0; 741 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 742 743 /* overwrite entries corresponding to the Nedelec field */ 744 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 745 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 746 for (i=0;i<ne;i++) { 747 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 748 iia[idxs[i]+1] = ii[i+1]-ii[i]; 749 } 750 751 /* iia in CSR */ 752 for (i=0;i<n;i++) iia[i+1] += iia[i]; 753 754 /* jja in CSR */ 755 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 756 for (i=0;i<n;i++) 757 if (!PetscBTLookup(btf,i)) 758 for (j=0;j<iiu[i+1]-iiu[i];j++) 759 jja[iia[i]+j] = jju[iiu[i]+j]; 760 761 /* map edge dofs connectivity */ 762 if (jj) { 763 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 764 for (i=0;i<ne;i++) { 765 PetscInt e = idxs[i]; 766 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 767 } 768 } 769 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 770 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 771 if (rest) { 772 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 773 } 774 if (free) { 775 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 776 } 777 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 778 } else { 779 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 780 } 781 782 /* Analyze interface for edge dofs */ 783 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 784 pcbddc->mat_graph->twodim = PETSC_FALSE; 785 786 /* Get coarse edges in the edge space */ 787 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 788 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 789 790 if (fl2g) { 791 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 792 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 793 for (i=0;i<nee;i++) { 794 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 795 } 796 } else { 797 eedges = alleedges; 798 primals = allprimals; 799 } 800 801 /* Mark fine edge dofs with their coarse edge id */ 802 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 803 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 804 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 805 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 806 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 807 if (print) { 808 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 809 ierr = ISView(primals,NULL);CHKERRQ(ierr); 810 } 811 812 maxsize = 0; 813 for (i=0;i<nee;i++) { 814 PetscInt size,mark = i+1; 815 816 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 817 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 818 for (j=0;j<size;j++) marks[idxs[j]] = mark; 819 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 820 maxsize = PetscMax(maxsize,size); 821 } 822 823 /* Find coarse edge endpoints */ 824 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 825 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 826 for (i=0;i<nee;i++) { 827 PetscInt mark = i+1,size; 828 829 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 830 if (!size && nedfieldlocal) continue; 831 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 832 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 833 if (print) { 834 ierr = PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);CHKERRQ(ierr); 835 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 836 } 837 for (j=0;j<size;j++) { 838 PetscInt k, ee = idxs[j]; 839 if (print) PetscPrintf(PETSC_COMM_SELF," idx %D\n",ee); 840 for (k=ii[ee];k<ii[ee+1];k++) { 841 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %D\n",jj[k]); 842 if (PetscBTLookup(btv,jj[k])) { 843 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %D\n",jj[k]); 844 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 845 PetscInt k2; 846 PetscBool corner = PETSC_FALSE; 847 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 848 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 849 /* it's a corner if either is connected with an edge dof belonging to a different cc or 850 if the edge dof lie on the natural part of the boundary */ 851 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 852 corner = PETSC_TRUE; 853 break; 854 } 855 } 856 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 857 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %D\n",jj[k]); 858 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 859 } else { 860 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 861 } 862 } 863 } 864 } 865 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 866 } 867 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 868 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 869 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 870 871 /* Reset marked primal dofs */ 872 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 873 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 874 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 875 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 876 877 /* Now use the initial lG */ 878 ierr = MatDestroy(&lG);CHKERRQ(ierr); 879 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 880 lG = lGinit; 881 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 882 883 /* Compute extended cols indices */ 884 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 885 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 886 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 887 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 888 i *= maxsize; 889 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 890 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 891 eerr = PETSC_FALSE; 892 for (i=0;i<nee;i++) { 893 PetscInt size,found = 0; 894 895 cum = 0; 896 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 897 if (!size && nedfieldlocal) continue; 898 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 899 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 900 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 901 for (j=0;j<size;j++) { 902 PetscInt k,ee = idxs[j]; 903 for (k=ii[ee];k<ii[ee+1];k++) { 904 PetscInt vv = jj[k]; 905 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 906 else if (!PetscBTLookupSet(btvc,vv)) found++; 907 } 908 } 909 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 910 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 911 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 912 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 913 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 914 /* it may happen that endpoints are not defined at this point 915 if it is the case, mark this edge for a second pass */ 916 if (cum != size -1 || found != 2) { 917 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 918 if (print) { 919 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 920 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 921 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 922 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 923 } 924 eerr = PETSC_TRUE; 925 } 926 } 927 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 928 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 929 if (done) { 930 PetscInt *newprimals; 931 932 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 933 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 934 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 935 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 936 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 937 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 938 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr); 939 for (i=0;i<nee;i++) { 940 PetscBool has_candidates = PETSC_FALSE; 941 if (PetscBTLookup(bter,i)) { 942 PetscInt size,mark = i+1; 943 944 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 945 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 946 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 947 for (j=0;j<size;j++) { 948 PetscInt k,ee = idxs[j]; 949 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]); 950 for (k=ii[ee];k<ii[ee+1];k++) { 951 /* set all candidates located on the edge as corners */ 952 if (PetscBTLookup(btvcand,jj[k])) { 953 PetscInt k2,vv = jj[k]; 954 has_candidates = PETSC_TRUE; 955 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %D\n",vv); 956 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 957 /* set all edge dofs connected to candidate as primals */ 958 for (k2=iit[vv];k2<iit[vv+1];k2++) { 959 if (marks[jjt[k2]] == mark) { 960 PetscInt k3,ee2 = jjt[k2]; 961 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %D\n",ee2); 962 newprimals[cum++] = ee2; 963 /* finally set the new corners */ 964 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 965 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %D\n",jj[k3]); 966 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 967 } 968 } 969 } 970 } else { 971 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %D\n",jj[k]); 972 } 973 } 974 } 975 if (!has_candidates) { /* circular edge */ 976 PetscInt k, ee = idxs[0],*tmarks; 977 978 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 979 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %D\n",i); 980 for (k=ii[ee];k<ii[ee+1];k++) { 981 PetscInt k2; 982 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %D\n",jj[k]); 983 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 984 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 985 } 986 for (j=0;j<size;j++) { 987 if (tmarks[idxs[j]] > 1) { 988 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %D\n",idxs[j]); 989 newprimals[cum++] = idxs[j]; 990 } 991 } 992 ierr = PetscFree(tmarks);CHKERRQ(ierr); 993 } 994 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 995 } 996 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 997 } 998 ierr = PetscFree(extcols);CHKERRQ(ierr); 999 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1000 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1001 if (fl2g) { 1002 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1003 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1004 for (i=0;i<nee;i++) { 1005 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1006 } 1007 ierr = PetscFree(eedges);CHKERRQ(ierr); 1008 } 1009 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1010 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1011 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1012 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1013 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1014 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1015 pcbddc->mat_graph->twodim = PETSC_FALSE; 1016 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1017 if (fl2g) { 1018 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1019 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1020 for (i=0;i<nee;i++) { 1021 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1022 } 1023 } else { 1024 eedges = alleedges; 1025 primals = allprimals; 1026 } 1027 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1028 1029 /* Mark again */ 1030 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1031 for (i=0;i<nee;i++) { 1032 PetscInt size,mark = i+1; 1033 1034 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1035 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1036 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1037 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1038 } 1039 if (print) { 1040 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1041 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1042 } 1043 1044 /* Recompute extended cols */ 1045 eerr = PETSC_FALSE; 1046 for (i=0;i<nee;i++) { 1047 PetscInt size; 1048 1049 cum = 0; 1050 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1051 if (!size && nedfieldlocal) continue; 1052 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1053 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1054 for (j=0;j<size;j++) { 1055 PetscInt k,ee = idxs[j]; 1056 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1057 } 1058 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1059 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1060 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1061 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1062 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1063 if (cum != size -1) { 1064 if (print) { 1065 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1066 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1067 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1068 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1069 } 1070 eerr = PETSC_TRUE; 1071 } 1072 } 1073 } 1074 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1075 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1076 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1077 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1078 /* an error should not occur at this point */ 1079 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1080 1081 /* Check the number of endpoints */ 1082 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1083 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1084 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1085 for (i=0;i<nee;i++) { 1086 PetscInt size, found = 0, gc[2]; 1087 1088 /* init with defaults */ 1089 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1090 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1091 if (!size && nedfieldlocal) continue; 1092 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1093 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1094 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1095 for (j=0;j<size;j++) { 1096 PetscInt k,ee = idxs[j]; 1097 for (k=ii[ee];k<ii[ee+1];k++) { 1098 PetscInt vv = jj[k]; 1099 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1100 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i); 1101 corners[i*2+found++] = vv; 1102 } 1103 } 1104 } 1105 if (found != 2) { 1106 PetscInt e; 1107 if (fl2g) { 1108 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1109 } else { 1110 e = idxs[0]; 1111 } 1112 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]); 1113 } 1114 1115 /* get primal dof index on this coarse edge */ 1116 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1117 if (gc[0] > gc[1]) { 1118 PetscInt swap = corners[2*i]; 1119 corners[2*i] = corners[2*i+1]; 1120 corners[2*i+1] = swap; 1121 } 1122 cedges[i] = idxs[size-1]; 1123 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1124 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1125 } 1126 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1127 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1128 1129 #if defined(PETSC_USE_DEBUG) 1130 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1131 not interfere with neighbouring coarse edges */ 1132 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1133 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1134 for (i=0;i<nv;i++) { 1135 PetscInt emax = 0,eemax = 0; 1136 1137 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1138 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1139 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1140 for (j=1;j<nee+1;j++) { 1141 if (emax < emarks[j]) { 1142 emax = emarks[j]; 1143 eemax = j; 1144 } 1145 } 1146 /* not relevant for edges */ 1147 if (!eemax) continue; 1148 1149 for (j=ii[i];j<ii[i+1];j++) { 1150 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1151 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]); 1152 } 1153 } 1154 } 1155 ierr = PetscFree(emarks);CHKERRQ(ierr); 1156 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1157 #endif 1158 1159 /* Compute extended rows indices for edge blocks of the change of basis */ 1160 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1161 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1162 extmem *= maxsize; 1163 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1164 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1165 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1166 for (i=0;i<nv;i++) { 1167 PetscInt mark = 0,size,start; 1168 1169 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1170 for (j=ii[i];j<ii[i+1];j++) 1171 if (marks[jj[j]] && !mark) 1172 mark = marks[jj[j]]; 1173 1174 /* not relevant */ 1175 if (!mark) continue; 1176 1177 /* import extended row */ 1178 mark--; 1179 start = mark*extmem+extrowcum[mark]; 1180 size = ii[i+1]-ii[i]; 1181 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem); 1182 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1183 extrowcum[mark] += size; 1184 } 1185 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1186 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1187 ierr = PetscFree(marks);CHKERRQ(ierr); 1188 1189 /* Compress extrows */ 1190 cum = 0; 1191 for (i=0;i<nee;i++) { 1192 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1193 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1194 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1195 cum = PetscMax(cum,size); 1196 } 1197 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1198 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1199 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1200 1201 /* Workspace for lapack inner calls and VecSetValues */ 1202 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1203 1204 /* Create change of basis matrix (preallocation can be improved) */ 1205 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1206 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1207 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1208 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1209 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1210 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1211 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1212 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1213 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1214 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1215 1216 /* Defaults to identity */ 1217 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1218 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1219 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1220 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1221 1222 /* Create discrete gradient for the coarser level if needed */ 1223 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1224 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1225 if (pcbddc->current_level < pcbddc->max_levels) { 1226 ISLocalToGlobalMapping cel2g,cvl2g; 1227 IS wis,gwis; 1228 PetscInt cnv,cne; 1229 1230 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1231 if (fl2g) { 1232 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1233 } else { 1234 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1235 pcbddc->nedclocal = wis; 1236 } 1237 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1238 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1239 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1240 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1241 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1242 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1243 1244 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1245 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1246 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1247 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1248 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1249 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1250 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1251 1252 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1253 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1254 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1255 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1256 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1257 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1258 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1259 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1260 } 1261 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1262 1263 #if defined(PRINT_GDET) 1264 inc = 0; 1265 lev = pcbddc->current_level; 1266 #endif 1267 1268 /* Insert values in the change of basis matrix */ 1269 for (i=0;i<nee;i++) { 1270 Mat Gins = NULL, GKins = NULL; 1271 IS cornersis = NULL; 1272 PetscScalar cvals[2]; 1273 1274 if (pcbddc->nedcG) { 1275 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1276 } 1277 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1278 if (Gins && GKins) { 1279 PetscScalar *data; 1280 const PetscInt *rows,*cols; 1281 PetscInt nrh,nch,nrc,ncc; 1282 1283 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1284 /* H1 */ 1285 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1286 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1287 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1288 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1289 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1290 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1291 /* complement */ 1292 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1293 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i); 1294 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i); 1295 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc); 1296 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1297 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1298 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1299 1300 /* coarse discrete gradient */ 1301 if (pcbddc->nedcG) { 1302 PetscInt cols[2]; 1303 1304 cols[0] = 2*i; 1305 cols[1] = 2*i+1; 1306 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1307 } 1308 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1309 } 1310 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1311 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1312 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1313 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1314 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1315 } 1316 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1317 1318 /* Start assembling */ 1319 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1320 if (pcbddc->nedcG) { 1321 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1322 } 1323 1324 /* Free */ 1325 if (fl2g) { 1326 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1327 for (i=0;i<nee;i++) { 1328 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1329 } 1330 ierr = PetscFree(eedges);CHKERRQ(ierr); 1331 } 1332 1333 /* hack mat_graph with primal dofs on the coarse edges */ 1334 { 1335 PCBDDCGraph graph = pcbddc->mat_graph; 1336 PetscInt *oqueue = graph->queue; 1337 PetscInt *ocptr = graph->cptr; 1338 PetscInt ncc,*idxs; 1339 1340 /* find first primal edge */ 1341 if (pcbddc->nedclocal) { 1342 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1343 } else { 1344 if (fl2g) { 1345 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1346 } 1347 idxs = cedges; 1348 } 1349 cum = 0; 1350 while (cum < nee && cedges[cum] < 0) cum++; 1351 1352 /* adapt connected components */ 1353 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1354 graph->cptr[0] = 0; 1355 for (i=0,ncc=0;i<graph->ncc;i++) { 1356 PetscInt lc = ocptr[i+1]-ocptr[i]; 1357 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1358 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1359 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1360 ncc++; 1361 lc--; 1362 cum++; 1363 while (cum < nee && cedges[cum] < 0) cum++; 1364 } 1365 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1366 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1367 ncc++; 1368 } 1369 graph->ncc = ncc; 1370 if (pcbddc->nedclocal) { 1371 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1372 } 1373 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1374 } 1375 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1376 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1377 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1378 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1379 1380 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1381 ierr = PetscFree(extrow);CHKERRQ(ierr); 1382 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1383 ierr = PetscFree(corners);CHKERRQ(ierr); 1384 ierr = PetscFree(cedges);CHKERRQ(ierr); 1385 ierr = PetscFree(extrows);CHKERRQ(ierr); 1386 ierr = PetscFree(extcols);CHKERRQ(ierr); 1387 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1388 1389 /* Complete assembling */ 1390 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1391 if (pcbddc->nedcG) { 1392 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1393 #if 0 1394 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1395 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1396 #endif 1397 } 1398 1399 /* set change of basis */ 1400 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1401 ierr = MatDestroy(&T);CHKERRQ(ierr); 1402 1403 PetscFunctionReturn(0); 1404 } 1405 1406 /* the near-null space of BDDC carries information on quadrature weights, 1407 and these can be collinear -> so cheat with MatNullSpaceCreate 1408 and create a suitable set of basis vectors first */ 1409 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1410 { 1411 PetscErrorCode ierr; 1412 PetscInt i; 1413 1414 PetscFunctionBegin; 1415 for (i=0;i<nvecs;i++) { 1416 PetscInt first,last; 1417 1418 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1419 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1420 if (i>=first && i < last) { 1421 PetscScalar *data; 1422 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1423 if (!has_const) { 1424 data[i-first] = 1.; 1425 } else { 1426 data[2*i-first] = 1./PetscSqrtReal(2.); 1427 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1428 } 1429 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1430 } 1431 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1432 } 1433 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1434 for (i=0;i<nvecs;i++) { /* reset vectors */ 1435 PetscInt first,last; 1436 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1437 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1438 if (i>=first && i < last) { 1439 PetscScalar *data; 1440 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1441 if (!has_const) { 1442 data[i-first] = 0.; 1443 } else { 1444 data[2*i-first] = 0.; 1445 data[2*i-first+1] = 0.; 1446 } 1447 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1448 } 1449 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1450 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1451 } 1452 PetscFunctionReturn(0); 1453 } 1454 1455 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1456 { 1457 Mat loc_divudotp; 1458 Vec p,v,vins,quad_vec,*quad_vecs; 1459 ISLocalToGlobalMapping map; 1460 PetscScalar *vals; 1461 const PetscScalar *array; 1462 PetscInt i,maxneighs,maxsize; 1463 PetscInt n_neigh,*neigh,*n_shared,**shared; 1464 PetscMPIInt rank; 1465 PetscErrorCode ierr; 1466 1467 PetscFunctionBegin; 1468 ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1469 ierr = MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1470 if (!maxneighs) { 1471 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1472 *nnsp = NULL; 1473 PetscFunctionReturn(0); 1474 } 1475 maxsize = 0; 1476 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1477 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1478 /* create vectors to hold quadrature weights */ 1479 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1480 if (!transpose) { 1481 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1482 } else { 1483 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1484 } 1485 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1486 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1487 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1488 for (i=0;i<maxneighs;i++) { 1489 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1490 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1491 } 1492 1493 /* compute local quad vec */ 1494 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1495 if (!transpose) { 1496 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1497 } else { 1498 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1499 } 1500 ierr = VecSet(p,1.);CHKERRQ(ierr); 1501 if (!transpose) { 1502 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1503 } else { 1504 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1505 } 1506 if (vl2l) { 1507 Mat lA; 1508 VecScatter sc; 1509 1510 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1511 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1512 ierr = VecScatterCreateWithData(v,NULL,vins,vl2l,&sc);CHKERRQ(ierr); 1513 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1514 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1515 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1516 } else { 1517 vins = v; 1518 } 1519 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1520 ierr = VecDestroy(&p);CHKERRQ(ierr); 1521 1522 /* insert in global quadrature vecs */ 1523 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1524 for (i=0;i<n_neigh;i++) { 1525 const PetscInt *idxs; 1526 PetscInt idx,nn,j; 1527 1528 idxs = shared[i]; 1529 nn = n_shared[i]; 1530 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1531 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1532 idx = -(idx+1); 1533 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1534 } 1535 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1536 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1537 if (vl2l) { 1538 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1539 } 1540 ierr = VecDestroy(&v);CHKERRQ(ierr); 1541 ierr = PetscFree(vals);CHKERRQ(ierr); 1542 1543 /* assemble near null space */ 1544 for (i=0;i<maxneighs;i++) { 1545 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1546 } 1547 for (i=0;i<maxneighs;i++) { 1548 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1549 ierr = VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");CHKERRQ(ierr); 1550 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1551 } 1552 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1553 PetscFunctionReturn(0); 1554 } 1555 1556 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1557 { 1558 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1559 PetscErrorCode ierr; 1560 1561 PetscFunctionBegin; 1562 if (primalv) { 1563 if (pcbddc->user_primal_vertices_local) { 1564 IS list[2], newp; 1565 1566 list[0] = primalv; 1567 list[1] = pcbddc->user_primal_vertices_local; 1568 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1569 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1570 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1571 pcbddc->user_primal_vertices_local = newp; 1572 } else { 1573 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1574 } 1575 } 1576 PetscFunctionReturn(0); 1577 } 1578 1579 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1580 { 1581 PetscInt f, *comp = (PetscInt *)ctx; 1582 1583 PetscFunctionBegin; 1584 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1585 PetscFunctionReturn(0); 1586 } 1587 1588 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1589 { 1590 PetscErrorCode ierr; 1591 Vec local,global; 1592 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1593 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1594 PetscBool monolithic = PETSC_FALSE; 1595 1596 PetscFunctionBegin; 1597 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1598 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1599 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1600 /* need to convert from global to local topology information and remove references to information in global ordering */ 1601 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1602 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1603 if (monolithic) { /* just get block size to properly compute vertices */ 1604 if (pcbddc->vertex_size == 1) { 1605 ierr = MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);CHKERRQ(ierr); 1606 } 1607 goto boundary; 1608 } 1609 1610 if (pcbddc->user_provided_isfordofs) { 1611 if (pcbddc->n_ISForDofs) { 1612 PetscInt i; 1613 1614 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1615 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1616 PetscInt bs; 1617 1618 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1619 ierr = ISGetBlockSize(pcbddc->ISForDofs[i],&bs);CHKERRQ(ierr); 1620 ierr = ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);CHKERRQ(ierr); 1621 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1622 } 1623 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1624 pcbddc->n_ISForDofs = 0; 1625 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1626 } 1627 } else { 1628 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1629 DM dm; 1630 1631 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1632 if (!dm) { 1633 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1634 } 1635 if (dm) { 1636 IS *fields; 1637 PetscInt nf,i; 1638 1639 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1640 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1641 for (i=0;i<nf;i++) { 1642 PetscInt bs; 1643 1644 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1645 ierr = ISGetBlockSize(fields[i],&bs);CHKERRQ(ierr); 1646 ierr = ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);CHKERRQ(ierr); 1647 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1648 } 1649 ierr = PetscFree(fields);CHKERRQ(ierr); 1650 pcbddc->n_ISForDofsLocal = nf; 1651 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1652 PetscContainer c; 1653 1654 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1655 if (c) { 1656 MatISLocalFields lf; 1657 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1658 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1659 } else { /* fallback, create the default fields if bs > 1 */ 1660 PetscInt i, n = matis->A->rmap->n; 1661 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1662 if (i > 1) { 1663 pcbddc->n_ISForDofsLocal = i; 1664 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1665 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1666 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1667 } 1668 } 1669 } 1670 } 1671 } else { 1672 PetscInt i; 1673 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1674 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1675 } 1676 } 1677 } 1678 1679 boundary: 1680 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1681 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1682 } else if (pcbddc->DirichletBoundariesLocal) { 1683 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1684 } 1685 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1686 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1687 } else if (pcbddc->NeumannBoundariesLocal) { 1688 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1689 } 1690 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1691 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1692 } 1693 ierr = VecDestroy(&global);CHKERRQ(ierr); 1694 ierr = VecDestroy(&local);CHKERRQ(ierr); 1695 /* detect local disconnected subdomains if requested (use matis->A) */ 1696 if (pcbddc->detect_disconnected) { 1697 IS primalv = NULL; 1698 PetscInt i; 1699 PetscBool filter = pcbddc->detect_disconnected_filter; 1700 1701 for (i=0;i<pcbddc->n_local_subs;i++) { 1702 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1703 } 1704 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1705 ierr = PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1706 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1707 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1708 } 1709 /* early stage corner detection */ 1710 { 1711 DM dm; 1712 1713 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1714 if (dm) { 1715 PetscBool isda; 1716 1717 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1718 if (isda) { 1719 ISLocalToGlobalMapping l2l; 1720 IS corners; 1721 Mat lA; 1722 1723 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1724 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1725 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1726 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1727 if (l2l && corners) { 1728 const PetscInt *idx; 1729 PetscInt dof,bs,*idxout,n; 1730 1731 ierr = DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 1732 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1733 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1734 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1735 if (bs == dof) { 1736 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1737 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1738 } else { /* the original DMDA local-to-local map have been modified */ 1739 PetscInt i,d; 1740 1741 ierr = PetscMalloc1(dof*n,&idxout);CHKERRQ(ierr); 1742 for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d; 1743 ierr = ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);CHKERRQ(ierr); 1744 1745 bs = 1; 1746 n *= dof; 1747 } 1748 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1749 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1750 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1751 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1752 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1753 pcbddc->corner_selected = PETSC_TRUE; 1754 } else if (corners) { /* not from DMDA */ 1755 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1756 } 1757 } 1758 } 1759 } 1760 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1761 DM dm; 1762 1763 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1764 if (!dm) { 1765 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1766 } 1767 if (dm) { 1768 Vec vcoords; 1769 PetscSection section; 1770 PetscReal *coords; 1771 PetscInt d,cdim,nl,nf,**ctxs; 1772 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1773 1774 ierr = DMGetCoordinateDim(dm,&cdim);CHKERRQ(ierr); 1775 ierr = DMGetSection(dm,§ion);CHKERRQ(ierr); 1776 ierr = PetscSectionGetNumFields(section,&nf);CHKERRQ(ierr); 1777 ierr = DMCreateGlobalVector(dm,&vcoords);CHKERRQ(ierr); 1778 ierr = VecGetLocalSize(vcoords,&nl);CHKERRQ(ierr); 1779 ierr = PetscMalloc1(nl*cdim,&coords);CHKERRQ(ierr); 1780 ierr = PetscMalloc2(nf,&funcs,nf,&ctxs);CHKERRQ(ierr); 1781 ierr = PetscMalloc1(nf,&ctxs[0]);CHKERRQ(ierr); 1782 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1783 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1784 for (d=0;d<cdim;d++) { 1785 PetscInt i; 1786 const PetscScalar *v; 1787 1788 for (i=0;i<nf;i++) ctxs[i][0] = d; 1789 ierr = DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);CHKERRQ(ierr); 1790 ierr = VecGetArrayRead(vcoords,&v);CHKERRQ(ierr); 1791 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1792 ierr = VecRestoreArrayRead(vcoords,&v);CHKERRQ(ierr); 1793 } 1794 ierr = VecDestroy(&vcoords);CHKERRQ(ierr); 1795 ierr = PCSetCoordinates(pc,cdim,nl,coords);CHKERRQ(ierr); 1796 ierr = PetscFree(coords);CHKERRQ(ierr); 1797 ierr = PetscFree(ctxs[0]);CHKERRQ(ierr); 1798 ierr = PetscFree2(funcs,ctxs);CHKERRQ(ierr); 1799 } 1800 } 1801 PetscFunctionReturn(0); 1802 } 1803 1804 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1805 { 1806 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1807 PetscErrorCode ierr; 1808 IS nis; 1809 const PetscInt *idxs; 1810 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1811 PetscBool *ld; 1812 1813 PetscFunctionBegin; 1814 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1815 if (mop == MPI_LAND) { 1816 /* init rootdata with true */ 1817 ld = (PetscBool*) matis->sf_rootdata; 1818 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1819 } else { 1820 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1821 } 1822 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1823 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1824 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1825 ld = (PetscBool*) matis->sf_leafdata; 1826 for (i=0;i<nd;i++) 1827 if (-1 < idxs[i] && idxs[i] < n) 1828 ld[idxs[i]] = PETSC_TRUE; 1829 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1830 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1831 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1832 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1833 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1834 if (mop == MPI_LAND) { 1835 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1836 } else { 1837 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1838 } 1839 for (i=0,nnd=0;i<n;i++) 1840 if (ld[i]) 1841 nidxs[nnd++] = i; 1842 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1843 ierr = ISDestroy(is);CHKERRQ(ierr); 1844 *is = nis; 1845 PetscFunctionReturn(0); 1846 } 1847 1848 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1849 { 1850 PC_IS *pcis = (PC_IS*)(pc->data); 1851 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1852 PetscErrorCode ierr; 1853 1854 PetscFunctionBegin; 1855 if (!pcbddc->benign_have_null) { 1856 PetscFunctionReturn(0); 1857 } 1858 if (pcbddc->ChangeOfBasisMatrix) { 1859 Vec swap; 1860 1861 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1862 swap = pcbddc->work_change; 1863 pcbddc->work_change = r; 1864 r = swap; 1865 } 1866 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1867 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1868 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1869 ierr = VecSet(z,0.);CHKERRQ(ierr); 1870 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1871 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1872 if (pcbddc->ChangeOfBasisMatrix) { 1873 pcbddc->work_change = r; 1874 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1875 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1876 } 1877 PetscFunctionReturn(0); 1878 } 1879 1880 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1881 { 1882 PCBDDCBenignMatMult_ctx ctx; 1883 PetscErrorCode ierr; 1884 PetscBool apply_right,apply_left,reset_x; 1885 1886 PetscFunctionBegin; 1887 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1888 if (transpose) { 1889 apply_right = ctx->apply_left; 1890 apply_left = ctx->apply_right; 1891 } else { 1892 apply_right = ctx->apply_right; 1893 apply_left = ctx->apply_left; 1894 } 1895 reset_x = PETSC_FALSE; 1896 if (apply_right) { 1897 const PetscScalar *ax; 1898 PetscInt nl,i; 1899 1900 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1901 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1902 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1903 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1904 for (i=0;i<ctx->benign_n;i++) { 1905 PetscScalar sum,val; 1906 const PetscInt *idxs; 1907 PetscInt nz,j; 1908 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1909 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1910 sum = 0.; 1911 if (ctx->apply_p0) { 1912 val = ctx->work[idxs[nz-1]]; 1913 for (j=0;j<nz-1;j++) { 1914 sum += ctx->work[idxs[j]]; 1915 ctx->work[idxs[j]] += val; 1916 } 1917 } else { 1918 for (j=0;j<nz-1;j++) { 1919 sum += ctx->work[idxs[j]]; 1920 } 1921 } 1922 ctx->work[idxs[nz-1]] -= sum; 1923 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1924 } 1925 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1926 reset_x = PETSC_TRUE; 1927 } 1928 if (transpose) { 1929 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1930 } else { 1931 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1932 } 1933 if (reset_x) { 1934 ierr = VecResetArray(x);CHKERRQ(ierr); 1935 } 1936 if (apply_left) { 1937 PetscScalar *ay; 1938 PetscInt i; 1939 1940 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1941 for (i=0;i<ctx->benign_n;i++) { 1942 PetscScalar sum,val; 1943 const PetscInt *idxs; 1944 PetscInt nz,j; 1945 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1946 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1947 val = -ay[idxs[nz-1]]; 1948 if (ctx->apply_p0) { 1949 sum = 0.; 1950 for (j=0;j<nz-1;j++) { 1951 sum += ay[idxs[j]]; 1952 ay[idxs[j]] += val; 1953 } 1954 ay[idxs[nz-1]] += sum; 1955 } else { 1956 for (j=0;j<nz-1;j++) { 1957 ay[idxs[j]] += val; 1958 } 1959 ay[idxs[nz-1]] = 0.; 1960 } 1961 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1962 } 1963 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1964 } 1965 PetscFunctionReturn(0); 1966 } 1967 1968 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1969 { 1970 PetscErrorCode ierr; 1971 1972 PetscFunctionBegin; 1973 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1974 PetscFunctionReturn(0); 1975 } 1976 1977 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1978 { 1979 PetscErrorCode ierr; 1980 1981 PetscFunctionBegin; 1982 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1983 PetscFunctionReturn(0); 1984 } 1985 1986 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1987 { 1988 PC_IS *pcis = (PC_IS*)pc->data; 1989 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1990 PCBDDCBenignMatMult_ctx ctx; 1991 PetscErrorCode ierr; 1992 1993 PetscFunctionBegin; 1994 if (!restore) { 1995 Mat A_IB,A_BI; 1996 PetscScalar *work; 1997 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1998 1999 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2000 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2001 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 2002 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 2003 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2004 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 2005 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 2006 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 2007 ierr = PetscNew(&ctx);CHKERRQ(ierr); 2008 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 2009 ctx->apply_left = PETSC_TRUE; 2010 ctx->apply_right = PETSC_FALSE; 2011 ctx->apply_p0 = PETSC_FALSE; 2012 ctx->benign_n = pcbddc->benign_n; 2013 if (reuse) { 2014 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2015 ctx->free = PETSC_FALSE; 2016 } else { /* TODO: could be optimized for successive solves */ 2017 ISLocalToGlobalMapping N_to_D; 2018 PetscInt i; 2019 2020 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2021 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2022 for (i=0;i<pcbddc->benign_n;i++) { 2023 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2024 } 2025 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2026 ctx->free = PETSC_TRUE; 2027 } 2028 ctx->A = pcis->A_IB; 2029 ctx->work = work; 2030 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2031 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2032 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2033 pcis->A_IB = A_IB; 2034 2035 /* A_BI as A_IB^T */ 2036 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2037 pcbddc->benign_original_mat = pcis->A_BI; 2038 pcis->A_BI = A_BI; 2039 } else { 2040 if (!pcbddc->benign_original_mat) { 2041 PetscFunctionReturn(0); 2042 } 2043 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2044 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2045 pcis->A_IB = ctx->A; 2046 ctx->A = NULL; 2047 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2048 pcis->A_BI = pcbddc->benign_original_mat; 2049 pcbddc->benign_original_mat = NULL; 2050 if (ctx->free) { 2051 PetscInt i; 2052 for (i=0;i<ctx->benign_n;i++) { 2053 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2054 } 2055 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2056 } 2057 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2058 ierr = PetscFree(ctx);CHKERRQ(ierr); 2059 } 2060 PetscFunctionReturn(0); 2061 } 2062 2063 /* used just in bddc debug mode */ 2064 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2065 { 2066 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2067 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2068 Mat An; 2069 PetscErrorCode ierr; 2070 2071 PetscFunctionBegin; 2072 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2073 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2074 if (is1) { 2075 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2076 ierr = MatDestroy(&An);CHKERRQ(ierr); 2077 } else { 2078 *B = An; 2079 } 2080 PetscFunctionReturn(0); 2081 } 2082 2083 /* TODO: add reuse flag */ 2084 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2085 { 2086 Mat Bt; 2087 PetscScalar *a,*bdata; 2088 const PetscInt *ii,*ij; 2089 PetscInt m,n,i,nnz,*bii,*bij; 2090 PetscBool flg_row; 2091 PetscErrorCode ierr; 2092 2093 PetscFunctionBegin; 2094 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2095 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2096 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2097 nnz = n; 2098 for (i=0;i<ii[n];i++) { 2099 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2100 } 2101 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2102 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2103 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2104 nnz = 0; 2105 bii[0] = 0; 2106 for (i=0;i<n;i++) { 2107 PetscInt j; 2108 for (j=ii[i];j<ii[i+1];j++) { 2109 PetscScalar entry = a[j]; 2110 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2111 bij[nnz] = ij[j]; 2112 bdata[nnz] = entry; 2113 nnz++; 2114 } 2115 } 2116 bii[i+1] = nnz; 2117 } 2118 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2119 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2120 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2121 { 2122 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2123 b->free_a = PETSC_TRUE; 2124 b->free_ij = PETSC_TRUE; 2125 } 2126 if (*B == A) { 2127 ierr = MatDestroy(&A);CHKERRQ(ierr); 2128 } 2129 *B = Bt; 2130 PetscFunctionReturn(0); 2131 } 2132 2133 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv) 2134 { 2135 Mat B = NULL; 2136 DM dm; 2137 IS is_dummy,*cc_n; 2138 ISLocalToGlobalMapping l2gmap_dummy; 2139 PCBDDCGraph graph; 2140 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2141 PetscInt i,n; 2142 PetscInt *xadj,*adjncy; 2143 PetscBool isplex = PETSC_FALSE; 2144 PetscErrorCode ierr; 2145 2146 PetscFunctionBegin; 2147 if (ncc) *ncc = 0; 2148 if (cc) *cc = NULL; 2149 if (primalv) *primalv = NULL; 2150 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2151 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2152 if (!dm) { 2153 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2154 } 2155 if (dm) { 2156 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2157 } 2158 if (filter) isplex = PETSC_FALSE; 2159 2160 if (isplex) { /* this code has been modified from plexpartition.c */ 2161 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2162 PetscInt *adj = NULL; 2163 IS cellNumbering; 2164 const PetscInt *cellNum; 2165 PetscBool useCone, useClosure; 2166 PetscSection section; 2167 PetscSegBuffer adjBuffer; 2168 PetscSF sfPoint; 2169 PetscErrorCode ierr; 2170 2171 PetscFunctionBegin; 2172 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2173 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2174 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2175 /* Build adjacency graph via a section/segbuffer */ 2176 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2177 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2178 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2179 /* Always use FVM adjacency to create partitioner graph */ 2180 ierr = DMPlexGetAdjacencyUseCone(dm, &useCone);CHKERRQ(ierr); 2181 ierr = DMPlexGetAdjacencyUseClosure(dm, &useClosure);CHKERRQ(ierr); 2182 ierr = DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);CHKERRQ(ierr); 2183 ierr = DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);CHKERRQ(ierr); 2184 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2185 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2186 for (n = 0, p = pStart; p < pEnd; p++) { 2187 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2188 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2189 adjSize = PETSC_DETERMINE; 2190 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2191 for (a = 0; a < adjSize; ++a) { 2192 const PetscInt point = adj[a]; 2193 if (pStart <= point && point < pEnd) { 2194 PetscInt *PETSC_RESTRICT pBuf; 2195 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2196 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2197 *pBuf = point; 2198 } 2199 } 2200 n++; 2201 } 2202 ierr = DMPlexSetAdjacencyUseCone(dm, useCone);CHKERRQ(ierr); 2203 ierr = DMPlexSetAdjacencyUseClosure(dm, useClosure);CHKERRQ(ierr); 2204 /* Derive CSR graph from section/segbuffer */ 2205 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2206 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2207 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2208 for (idx = 0, p = pStart; p < pEnd; p++) { 2209 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2210 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2211 } 2212 xadj[n] = size; 2213 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2214 /* Clean up */ 2215 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2216 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2217 ierr = PetscFree(adj);CHKERRQ(ierr); 2218 graph->xadj = xadj; 2219 graph->adjncy = adjncy; 2220 } else { 2221 Mat A; 2222 PetscBool isseqaij, flg_row; 2223 2224 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2225 if (!A->rmap->N || !A->cmap->N) { 2226 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2227 PetscFunctionReturn(0); 2228 } 2229 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2230 if (!isseqaij && filter) { 2231 PetscBool isseqdense; 2232 2233 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2234 if (!isseqdense) { 2235 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2236 } else { /* TODO: rectangular case and LDA */ 2237 PetscScalar *array; 2238 PetscReal chop=1.e-6; 2239 2240 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2241 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2242 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2243 for (i=0;i<n;i++) { 2244 PetscInt j; 2245 for (j=i+1;j<n;j++) { 2246 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2247 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2248 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2249 } 2250 } 2251 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2252 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2253 } 2254 } else { 2255 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2256 B = A; 2257 } 2258 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2259 2260 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2261 if (filter) { 2262 PetscScalar *data; 2263 PetscInt j,cum; 2264 2265 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2266 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2267 cum = 0; 2268 for (i=0;i<n;i++) { 2269 PetscInt t; 2270 2271 for (j=xadj[i];j<xadj[i+1];j++) { 2272 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2273 continue; 2274 } 2275 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2276 } 2277 t = xadj_filtered[i]; 2278 xadj_filtered[i] = cum; 2279 cum += t; 2280 } 2281 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2282 graph->xadj = xadj_filtered; 2283 graph->adjncy = adjncy_filtered; 2284 } else { 2285 graph->xadj = xadj; 2286 graph->adjncy = adjncy; 2287 } 2288 } 2289 /* compute local connected components using PCBDDCGraph */ 2290 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2291 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2292 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2293 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2294 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2295 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2296 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2297 2298 /* partial clean up */ 2299 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2300 if (B) { 2301 PetscBool flg_row; 2302 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2303 ierr = MatDestroy(&B);CHKERRQ(ierr); 2304 } 2305 if (isplex) { 2306 ierr = PetscFree(xadj);CHKERRQ(ierr); 2307 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2308 } 2309 2310 /* get back data */ 2311 if (isplex) { 2312 if (ncc) *ncc = graph->ncc; 2313 if (cc || primalv) { 2314 Mat A; 2315 PetscBT btv,btvt; 2316 PetscSection subSection; 2317 PetscInt *ids,cum,cump,*cids,*pids; 2318 2319 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2320 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2321 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2322 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2323 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2324 2325 cids[0] = 0; 2326 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2327 PetscInt j; 2328 2329 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2330 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2331 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2332 2333 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2334 for (k = 0; k < 2*size; k += 2) { 2335 PetscInt s, p = closure[k], off, dof, cdof; 2336 2337 ierr = PetscSectionGetConstraintDof(subSection, p, &cdof);CHKERRQ(ierr); 2338 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2339 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2340 for (s = 0; s < dof-cdof; s++) { 2341 if (PetscBTLookupSet(btvt,off+s)) continue; 2342 if (!PetscBTLookup(btv,off+s)) { 2343 ids[cum++] = off+s; 2344 } else { /* cross-vertex */ 2345 pids[cump++] = off+s; 2346 } 2347 } 2348 } 2349 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2350 } 2351 cids[i+1] = cum; 2352 /* mark dofs as already assigned */ 2353 for (j = cids[i]; j < cids[i+1]; j++) { 2354 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2355 } 2356 } 2357 if (cc) { 2358 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2359 for (i = 0; i < graph->ncc; i++) { 2360 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2361 } 2362 *cc = cc_n; 2363 } 2364 if (primalv) { 2365 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2366 } 2367 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2368 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2369 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2370 } 2371 } else { 2372 if (ncc) *ncc = graph->ncc; 2373 if (cc) { 2374 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2375 for (i=0;i<graph->ncc;i++) { 2376 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2377 } 2378 *cc = cc_n; 2379 } 2380 } 2381 /* clean up graph */ 2382 graph->xadj = 0; 2383 graph->adjncy = 0; 2384 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2385 PetscFunctionReturn(0); 2386 } 2387 2388 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2389 { 2390 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2391 PC_IS* pcis = (PC_IS*)(pc->data); 2392 IS dirIS = NULL; 2393 PetscInt i; 2394 PetscErrorCode ierr; 2395 2396 PetscFunctionBegin; 2397 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2398 if (zerodiag) { 2399 Mat A; 2400 Vec vec3_N; 2401 PetscScalar *vals; 2402 const PetscInt *idxs; 2403 PetscInt nz,*count; 2404 2405 /* p0 */ 2406 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2407 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2408 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2409 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2410 for (i=0;i<nz;i++) vals[i] = 1.; 2411 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2412 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2413 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2414 /* v_I */ 2415 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2416 for (i=0;i<nz;i++) vals[i] = 0.; 2417 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2418 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2419 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2420 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2421 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2422 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2423 if (dirIS) { 2424 PetscInt n; 2425 2426 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2427 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2428 for (i=0;i<n;i++) vals[i] = 0.; 2429 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2430 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2431 } 2432 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2433 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2434 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2435 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2436 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2437 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2438 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2439 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2440 ierr = PetscFree(vals);CHKERRQ(ierr); 2441 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2442 2443 /* there should not be any pressure dofs lying on the interface */ 2444 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2445 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2446 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2447 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2448 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2449 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]); 2450 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2451 ierr = PetscFree(count);CHKERRQ(ierr); 2452 } 2453 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2454 2455 /* check PCBDDCBenignGetOrSetP0 */ 2456 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2457 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2458 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2459 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2460 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2461 for (i=0;i<pcbddc->benign_n;i++) { 2462 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2463 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2464 } 2465 PetscFunctionReturn(0); 2466 } 2467 2468 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2469 { 2470 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2471 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2472 PetscInt nz,n; 2473 PetscInt *interior_dofs,n_interior_dofs,nneu; 2474 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2475 PetscErrorCode ierr; 2476 2477 PetscFunctionBegin; 2478 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2479 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2480 for (n=0;n<pcbddc->benign_n;n++) { 2481 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2482 } 2483 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2484 pcbddc->benign_n = 0; 2485 2486 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2487 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2488 Checks if all the pressure dofs in each subdomain have a zero diagonal 2489 If not, a change of basis on pressures is not needed 2490 since the local Schur complements are already SPD 2491 */ 2492 has_null_pressures = PETSC_TRUE; 2493 have_null = PETSC_TRUE; 2494 if (pcbddc->n_ISForDofsLocal) { 2495 IS iP = NULL; 2496 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2497 2498 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2499 ierr = PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2500 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2501 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2502 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2503 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2504 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2505 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2506 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2507 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2508 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2509 if (iP) { 2510 IS newpressures; 2511 2512 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2513 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2514 pressures = newpressures; 2515 } 2516 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2517 if (!sorted) { 2518 ierr = ISSort(pressures);CHKERRQ(ierr); 2519 } 2520 } else { 2521 pressures = NULL; 2522 } 2523 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2524 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2525 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2526 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2527 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2528 if (!sorted) { 2529 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2530 } 2531 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2532 zerodiag_save = zerodiag; 2533 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2534 if (!nz) { 2535 if (n) have_null = PETSC_FALSE; 2536 has_null_pressures = PETSC_FALSE; 2537 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2538 } 2539 recompute_zerodiag = PETSC_FALSE; 2540 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2541 zerodiag_subs = NULL; 2542 pcbddc->benign_n = 0; 2543 n_interior_dofs = 0; 2544 interior_dofs = NULL; 2545 nneu = 0; 2546 if (pcbddc->NeumannBoundariesLocal) { 2547 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2548 } 2549 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2550 if (checkb) { /* need to compute interior nodes */ 2551 PetscInt n,i,j; 2552 PetscInt n_neigh,*neigh,*n_shared,**shared; 2553 PetscInt *iwork; 2554 2555 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2556 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2557 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2558 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2559 for (i=1;i<n_neigh;i++) 2560 for (j=0;j<n_shared[i];j++) 2561 iwork[shared[i][j]] += 1; 2562 for (i=0;i<n;i++) 2563 if (!iwork[i]) 2564 interior_dofs[n_interior_dofs++] = i; 2565 ierr = PetscFree(iwork);CHKERRQ(ierr); 2566 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2567 } 2568 if (has_null_pressures) { 2569 IS *subs; 2570 PetscInt nsubs,i,j,nl; 2571 const PetscInt *idxs; 2572 PetscScalar *array; 2573 Vec *work; 2574 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2575 2576 subs = pcbddc->local_subs; 2577 nsubs = pcbddc->n_local_subs; 2578 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2579 if (checkb) { 2580 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2581 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2582 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2583 /* work[0] = 1_p */ 2584 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2585 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2586 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2587 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2588 /* work[0] = 1_v */ 2589 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2590 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2591 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2592 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2593 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2594 } 2595 if (nsubs > 1) { 2596 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2597 for (i=0;i<nsubs;i++) { 2598 ISLocalToGlobalMapping l2g; 2599 IS t_zerodiag_subs; 2600 PetscInt nl; 2601 2602 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2603 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2604 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2605 if (nl) { 2606 PetscBool valid = PETSC_TRUE; 2607 2608 if (checkb) { 2609 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2610 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2611 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2612 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2613 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2614 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2615 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2616 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2617 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2618 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2619 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2620 for (j=0;j<n_interior_dofs;j++) { 2621 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2622 valid = PETSC_FALSE; 2623 break; 2624 } 2625 } 2626 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2627 } 2628 if (valid && nneu) { 2629 const PetscInt *idxs; 2630 PetscInt nzb; 2631 2632 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2633 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2634 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2635 if (nzb) valid = PETSC_FALSE; 2636 } 2637 if (valid && pressures) { 2638 IS t_pressure_subs; 2639 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2640 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2641 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2642 } 2643 if (valid) { 2644 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2645 pcbddc->benign_n++; 2646 } else { 2647 recompute_zerodiag = PETSC_TRUE; 2648 } 2649 } 2650 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2651 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2652 } 2653 } else { /* there's just one subdomain (or zero if they have not been detected */ 2654 PetscBool valid = PETSC_TRUE; 2655 2656 if (nneu) valid = PETSC_FALSE; 2657 if (valid && pressures) { 2658 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2659 } 2660 if (valid && checkb) { 2661 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2662 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2663 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2664 for (j=0;j<n_interior_dofs;j++) { 2665 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2666 valid = PETSC_FALSE; 2667 break; 2668 } 2669 } 2670 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2671 } 2672 if (valid) { 2673 pcbddc->benign_n = 1; 2674 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2675 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2676 zerodiag_subs[0] = zerodiag; 2677 } 2678 } 2679 if (checkb) { 2680 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2681 } 2682 } 2683 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2684 2685 if (!pcbddc->benign_n) { 2686 PetscInt n; 2687 2688 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2689 recompute_zerodiag = PETSC_FALSE; 2690 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2691 if (n) { 2692 has_null_pressures = PETSC_FALSE; 2693 have_null = PETSC_FALSE; 2694 } 2695 } 2696 2697 /* final check for null pressures */ 2698 if (zerodiag && pressures) { 2699 PetscInt nz,np; 2700 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2701 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2702 if (nz != np) have_null = PETSC_FALSE; 2703 } 2704 2705 if (recompute_zerodiag) { 2706 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2707 if (pcbddc->benign_n == 1) { 2708 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2709 zerodiag = zerodiag_subs[0]; 2710 } else { 2711 PetscInt i,nzn,*new_idxs; 2712 2713 nzn = 0; 2714 for (i=0;i<pcbddc->benign_n;i++) { 2715 PetscInt ns; 2716 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2717 nzn += ns; 2718 } 2719 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2720 nzn = 0; 2721 for (i=0;i<pcbddc->benign_n;i++) { 2722 PetscInt ns,*idxs; 2723 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2724 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2725 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2726 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2727 nzn += ns; 2728 } 2729 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2730 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2731 } 2732 have_null = PETSC_FALSE; 2733 } 2734 2735 /* Prepare matrix to compute no-net-flux */ 2736 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2737 Mat A,loc_divudotp; 2738 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2739 IS row,col,isused = NULL; 2740 PetscInt M,N,n,st,n_isused; 2741 2742 if (pressures) { 2743 isused = pressures; 2744 } else { 2745 isused = zerodiag_save; 2746 } 2747 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2748 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2749 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2750 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2751 n_isused = 0; 2752 if (isused) { 2753 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2754 } 2755 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2756 st = st-n_isused; 2757 if (n) { 2758 const PetscInt *gidxs; 2759 2760 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2761 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2762 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2763 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2764 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2765 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2766 } else { 2767 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2768 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2769 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2770 } 2771 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2772 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2773 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2774 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2775 ierr = ISDestroy(&row);CHKERRQ(ierr); 2776 ierr = ISDestroy(&col);CHKERRQ(ierr); 2777 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2778 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2779 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2780 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2781 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2782 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2783 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2784 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2785 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2786 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2787 } 2788 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2789 2790 /* change of basis and p0 dofs */ 2791 if (has_null_pressures) { 2792 IS zerodiagc; 2793 const PetscInt *idxs,*idxsc; 2794 PetscInt i,s,*nnz; 2795 2796 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2797 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2798 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2799 /* local change of basis for pressures */ 2800 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2801 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2802 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2803 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2804 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2805 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2806 for (i=0;i<pcbddc->benign_n;i++) { 2807 PetscInt nzs,j; 2808 2809 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2810 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2811 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2812 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2813 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2814 } 2815 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2816 ierr = MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2817 ierr = PetscFree(nnz);CHKERRQ(ierr); 2818 /* set identity on velocities */ 2819 for (i=0;i<n-nz;i++) { 2820 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2821 } 2822 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2823 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2824 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2825 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2826 /* set change on pressures */ 2827 for (s=0;s<pcbddc->benign_n;s++) { 2828 PetscScalar *array; 2829 PetscInt nzs; 2830 2831 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2832 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2833 for (i=0;i<nzs-1;i++) { 2834 PetscScalar vals[2]; 2835 PetscInt cols[2]; 2836 2837 cols[0] = idxs[i]; 2838 cols[1] = idxs[nzs-1]; 2839 vals[0] = 1.; 2840 vals[1] = 1.; 2841 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2842 } 2843 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2844 for (i=0;i<nzs-1;i++) array[i] = -1.; 2845 array[nzs-1] = 1.; 2846 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2847 /* store local idxs for p0 */ 2848 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2849 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2850 ierr = PetscFree(array);CHKERRQ(ierr); 2851 } 2852 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2853 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2854 /* project if needed */ 2855 if (pcbddc->benign_change_explicit) { 2856 Mat M; 2857 2858 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2859 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2860 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2861 ierr = MatDestroy(&M);CHKERRQ(ierr); 2862 } 2863 /* store global idxs for p0 */ 2864 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2865 } 2866 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2867 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2868 2869 /* determines if the coarse solver will be singular or not */ 2870 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2871 /* determines if the problem has subdomains with 0 pressure block */ 2872 have_null = (PetscBool)(!!pcbddc->benign_n); 2873 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2874 *zerodiaglocal = zerodiag; 2875 PetscFunctionReturn(0); 2876 } 2877 2878 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2879 { 2880 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2881 PetscScalar *array; 2882 PetscErrorCode ierr; 2883 2884 PetscFunctionBegin; 2885 if (!pcbddc->benign_sf) { 2886 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2887 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2888 } 2889 if (get) { 2890 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2891 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2892 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2893 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2894 } else { 2895 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2896 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2897 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2898 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2899 } 2900 PetscFunctionReturn(0); 2901 } 2902 2903 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2904 { 2905 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2906 PetscErrorCode ierr; 2907 2908 PetscFunctionBegin; 2909 /* TODO: add error checking 2910 - avoid nested pop (or push) calls. 2911 - cannot push before pop. 2912 - cannot call this if pcbddc->local_mat is NULL 2913 */ 2914 if (!pcbddc->benign_n) { 2915 PetscFunctionReturn(0); 2916 } 2917 if (pop) { 2918 if (pcbddc->benign_change_explicit) { 2919 IS is_p0; 2920 MatReuse reuse; 2921 2922 /* extract B_0 */ 2923 reuse = MAT_INITIAL_MATRIX; 2924 if (pcbddc->benign_B0) { 2925 reuse = MAT_REUSE_MATRIX; 2926 } 2927 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2928 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2929 /* remove rows and cols from local problem */ 2930 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2931 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2932 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2933 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2934 } else { 2935 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2936 PetscScalar *vals; 2937 PetscInt i,n,*idxs_ins; 2938 2939 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2940 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2941 if (!pcbddc->benign_B0) { 2942 PetscInt *nnz; 2943 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2944 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2945 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2946 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2947 for (i=0;i<pcbddc->benign_n;i++) { 2948 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2949 nnz[i] = n - nnz[i]; 2950 } 2951 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2952 ierr = MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2953 ierr = PetscFree(nnz);CHKERRQ(ierr); 2954 } 2955 2956 for (i=0;i<pcbddc->benign_n;i++) { 2957 PetscScalar *array; 2958 PetscInt *idxs,j,nz,cum; 2959 2960 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2961 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2962 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2963 for (j=0;j<nz;j++) vals[j] = 1.; 2964 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2965 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2966 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2967 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2968 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2969 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2970 cum = 0; 2971 for (j=0;j<n;j++) { 2972 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2973 vals[cum] = array[j]; 2974 idxs_ins[cum] = j; 2975 cum++; 2976 } 2977 } 2978 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2979 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2980 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2981 } 2982 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2983 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2984 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2985 } 2986 } else { /* push */ 2987 if (pcbddc->benign_change_explicit) { 2988 PetscInt i; 2989 2990 for (i=0;i<pcbddc->benign_n;i++) { 2991 PetscScalar *B0_vals; 2992 PetscInt *B0_cols,B0_ncol; 2993 2994 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2995 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2996 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2997 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2998 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2999 } 3000 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3001 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3002 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!"); 3003 } 3004 PetscFunctionReturn(0); 3005 } 3006 3007 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3008 { 3009 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3010 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3011 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3012 PetscBLASInt *B_iwork,*B_ifail; 3013 PetscScalar *work,lwork; 3014 PetscScalar *St,*S,*eigv; 3015 PetscScalar *Sarray,*Starray; 3016 PetscReal *eigs,thresh,lthresh,uthresh; 3017 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3018 PetscBool allocated_S_St; 3019 #if defined(PETSC_USE_COMPLEX) 3020 PetscReal *rwork; 3021 #endif 3022 PetscErrorCode ierr; 3023 3024 PetscFunctionBegin; 3025 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3026 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3027 if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3028 ierr = PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3029 3030 if (pcbddc->dbg_flag) { 3031 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3032 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3033 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3034 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3035 } 3036 3037 if (pcbddc->dbg_flag) { 3038 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);CHKERRQ(ierr); 3039 } 3040 3041 /* max size of subsets */ 3042 mss = 0; 3043 for (i=0;i<sub_schurs->n_subs;i++) { 3044 PetscInt subset_size; 3045 3046 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3047 mss = PetscMax(mss,subset_size); 3048 } 3049 3050 /* min/max and threshold */ 3051 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3052 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3053 nmax = PetscMax(nmin,nmax); 3054 allocated_S_St = PETSC_FALSE; 3055 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3056 allocated_S_St = PETSC_TRUE; 3057 } 3058 3059 /* allocate lapack workspace */ 3060 cum = cum2 = 0; 3061 maxneigs = 0; 3062 for (i=0;i<sub_schurs->n_subs;i++) { 3063 PetscInt n,subset_size; 3064 3065 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3066 n = PetscMin(subset_size,nmax); 3067 cum += subset_size; 3068 cum2 += subset_size*n; 3069 maxneigs = PetscMax(maxneigs,n); 3070 } 3071 if (mss) { 3072 if (sub_schurs->is_symmetric) { 3073 PetscBLASInt B_itype = 1; 3074 PetscBLASInt B_N = mss; 3075 PetscReal zero = 0.0; 3076 PetscReal eps = 0.0; /* dlamch? */ 3077 3078 B_lwork = -1; 3079 S = NULL; 3080 St = NULL; 3081 eigs = NULL; 3082 eigv = NULL; 3083 B_iwork = NULL; 3084 B_ifail = NULL; 3085 #if defined(PETSC_USE_COMPLEX) 3086 rwork = NULL; 3087 #endif 3088 thresh = 1.0; 3089 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3090 #if defined(PETSC_USE_COMPLEX) 3091 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3092 #else 3093 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3094 #endif 3095 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3096 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3097 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3098 } else { 3099 lwork = 0; 3100 } 3101 3102 nv = 0; 3103 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3104 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3105 } 3106 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3107 if (allocated_S_St) { 3108 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3109 } 3110 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3111 #if defined(PETSC_USE_COMPLEX) 3112 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3113 #endif 3114 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3115 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3116 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3117 nv+cum,&pcbddc->adaptive_constraints_idxs, 3118 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3119 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 3120 3121 maxneigs = 0; 3122 cum = cumarray = 0; 3123 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3124 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3125 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3126 const PetscInt *idxs; 3127 3128 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3129 for (cum=0;cum<nv;cum++) { 3130 pcbddc->adaptive_constraints_n[cum] = 1; 3131 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3132 pcbddc->adaptive_constraints_data[cum] = 1.0; 3133 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3134 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3135 } 3136 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3137 } 3138 3139 if (mss) { /* multilevel */ 3140 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3141 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3142 } 3143 3144 lthresh = pcbddc->adaptive_threshold[0]; 3145 uthresh = pcbddc->adaptive_threshold[1]; 3146 for (i=0;i<sub_schurs->n_subs;i++) { 3147 const PetscInt *idxs; 3148 PetscReal upper,lower; 3149 PetscInt j,subset_size,eigs_start = 0; 3150 PetscBLASInt B_N; 3151 PetscBool same_data = PETSC_FALSE; 3152 PetscBool scal = PETSC_FALSE; 3153 3154 if (pcbddc->use_deluxe_scaling) { 3155 upper = PETSC_MAX_REAL; 3156 lower = uthresh; 3157 } else { 3158 if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3159 upper = 1./uthresh; 3160 lower = 0.; 3161 } 3162 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3163 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3164 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3165 /* this is experimental: we assume the dofs have been properly grouped to have 3166 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3167 if (!sub_schurs->is_posdef) { 3168 Mat T; 3169 3170 for (j=0;j<subset_size;j++) { 3171 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3172 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);CHKERRQ(ierr); 3173 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3174 ierr = MatDestroy(&T);CHKERRQ(ierr); 3175 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);CHKERRQ(ierr); 3176 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3177 ierr = MatDestroy(&T);CHKERRQ(ierr); 3178 if (sub_schurs->change_primal_sub) { 3179 PetscInt nz,k; 3180 const PetscInt *idxs; 3181 3182 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);CHKERRQ(ierr); 3183 ierr = ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3184 for (k=0;k<nz;k++) { 3185 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3186 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3187 } 3188 ierr = ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3189 } 3190 scal = PETSC_TRUE; 3191 break; 3192 } 3193 } 3194 } 3195 3196 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3197 if (sub_schurs->is_symmetric) { 3198 PetscInt j,k; 3199 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 3200 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3201 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3202 } 3203 for (j=0;j<subset_size;j++) { 3204 for (k=j;k<subset_size;k++) { 3205 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3206 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3207 } 3208 } 3209 } else { 3210 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3211 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3212 } 3213 } else { 3214 S = Sarray + cumarray; 3215 St = Starray + cumarray; 3216 } 3217 /* see if we can save some work */ 3218 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3219 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 3220 } 3221 3222 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3223 B_neigs = 0; 3224 } else { 3225 if (sub_schurs->is_symmetric) { 3226 PetscBLASInt B_itype = 1; 3227 PetscBLASInt B_IL, B_IU; 3228 PetscReal eps = -1.0; /* dlamch? */ 3229 PetscInt nmin_s; 3230 PetscBool compute_range; 3231 3232 B_neigs = 0; 3233 compute_range = (PetscBool)!same_data; 3234 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3235 3236 if (pcbddc->dbg_flag) { 3237 PetscInt nc = 0; 3238 3239 if (sub_schurs->change_primal_sub) { 3240 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);CHKERRQ(ierr); 3241 } 3242 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);CHKERRQ(ierr); 3243 } 3244 3245 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3246 if (compute_range) { 3247 3248 /* ask for eigenvalues larger than thresh */ 3249 if (sub_schurs->is_posdef) { 3250 #if defined(PETSC_USE_COMPLEX) 3251 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3252 #else 3253 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3254 #endif 3255 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3256 } else { /* no theory so far, but it works nicely */ 3257 PetscInt recipe = 0,recipe_m = 1; 3258 PetscReal bb[2]; 3259 3260 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);CHKERRQ(ierr); 3261 switch (recipe) { 3262 case 0: 3263 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3264 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3265 #if defined(PETSC_USE_COMPLEX) 3266 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3267 #else 3268 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3269 #endif 3270 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3271 break; 3272 case 1: 3273 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3274 #if defined(PETSC_USE_COMPLEX) 3275 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3276 #else 3277 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3278 #endif 3279 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3280 if (!scal) { 3281 PetscBLASInt B_neigs2 = 0; 3282 3283 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3284 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3285 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3286 #if defined(PETSC_USE_COMPLEX) 3287 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3288 #else 3289 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3290 #endif 3291 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3292 B_neigs += B_neigs2; 3293 } 3294 break; 3295 case 2: 3296 if (scal) { 3297 bb[0] = PETSC_MIN_REAL; 3298 bb[1] = 0; 3299 #if defined(PETSC_USE_COMPLEX) 3300 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3301 #else 3302 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3303 #endif 3304 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3305 } else { 3306 PetscBLASInt B_neigs2 = 0; 3307 PetscBool import = PETSC_FALSE; 3308 3309 lthresh = PetscMax(lthresh,0.0); 3310 if (lthresh > 0.0) { 3311 bb[0] = PETSC_MIN_REAL; 3312 bb[1] = lthresh*lthresh; 3313 3314 import = PETSC_TRUE; 3315 #if defined(PETSC_USE_COMPLEX) 3316 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3317 #else 3318 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3319 #endif 3320 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3321 } 3322 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3323 bb[1] = PETSC_MAX_REAL; 3324 if (import) { 3325 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3326 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3327 } 3328 #if defined(PETSC_USE_COMPLEX) 3329 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3330 #else 3331 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3332 #endif 3333 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3334 B_neigs += B_neigs2; 3335 } 3336 break; 3337 case 3: 3338 if (scal) { 3339 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);CHKERRQ(ierr); 3340 } else { 3341 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);CHKERRQ(ierr); 3342 } 3343 if (!scal) { 3344 bb[0] = uthresh; 3345 bb[1] = PETSC_MAX_REAL; 3346 #if defined(PETSC_USE_COMPLEX) 3347 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3348 #else 3349 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3350 #endif 3351 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3352 } 3353 if (recipe_m > 0 && B_N - B_neigs > 0) { 3354 PetscBLASInt B_neigs2 = 0; 3355 3356 B_IL = 1; 3357 ierr = PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);CHKERRQ(ierr); 3358 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3359 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3360 #if defined(PETSC_USE_COMPLEX) 3361 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3362 #else 3363 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3364 #endif 3365 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3366 B_neigs += B_neigs2; 3367 } 3368 break; 3369 case 4: 3370 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3371 #if defined(PETSC_USE_COMPLEX) 3372 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3373 #else 3374 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3375 #endif 3376 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3377 { 3378 PetscBLASInt B_neigs2 = 0; 3379 3380 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL; 3381 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3382 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3383 #if defined(PETSC_USE_COMPLEX) 3384 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3385 #else 3386 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3387 #endif 3388 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3389 B_neigs += B_neigs2; 3390 } 3391 break; 3392 case 5: /* same as before: first compute all eigenvalues, then filter */ 3393 #if defined(PETSC_USE_COMPLEX) 3394 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3395 #else 3396 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3397 #endif 3398 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3399 { 3400 PetscInt e,k,ne; 3401 for (e=0,ne=0;e<B_neigs;e++) { 3402 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3403 for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k]; 3404 eigs[ne] = eigs[e]; 3405 ne++; 3406 } 3407 } 3408 ierr = PetscMemcpy(eigv,S,B_N*ne*sizeof(PetscScalar));CHKERRQ(ierr); 3409 B_neigs = ne; 3410 } 3411 break; 3412 default: 3413 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe); 3414 break; 3415 } 3416 } 3417 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3418 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3419 B_IL = 1; 3420 #if defined(PETSC_USE_COMPLEX) 3421 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3422 #else 3423 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3424 #endif 3425 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3426 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3427 PetscInt k; 3428 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3429 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3430 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3431 nmin = nmax; 3432 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 3433 for (k=0;k<nmax;k++) { 3434 eigs[k] = 1./PETSC_SMALL; 3435 eigv[k*(subset_size+1)] = 1.0; 3436 } 3437 } 3438 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3439 if (B_ierr) { 3440 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3441 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3442 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3443 } 3444 3445 if (B_neigs > nmax) { 3446 if (pcbddc->dbg_flag) { 3447 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %D.\n",B_neigs,nmax);CHKERRQ(ierr); 3448 } 3449 if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax; 3450 B_neigs = nmax; 3451 } 3452 3453 nmin_s = PetscMin(nmin,B_N); 3454 if (B_neigs < nmin_s) { 3455 PetscBLASInt B_neigs2 = 0; 3456 3457 if (pcbddc->use_deluxe_scaling) { 3458 if (scal) { 3459 B_IU = nmin_s; 3460 B_IL = B_neigs + 1; 3461 } else { 3462 B_IL = B_N - nmin_s + 1; 3463 B_IU = B_N - B_neigs; 3464 } 3465 } else { 3466 B_IL = B_neigs + 1; 3467 B_IU = nmin_s; 3468 } 3469 if (pcbddc->dbg_flag) { 3470 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);CHKERRQ(ierr); 3471 } 3472 if (sub_schurs->is_symmetric) { 3473 PetscInt j,k; 3474 for (j=0;j<subset_size;j++) { 3475 for (k=j;k<subset_size;k++) { 3476 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3477 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3478 } 3479 } 3480 } else { 3481 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3482 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3483 } 3484 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3485 #if defined(PETSC_USE_COMPLEX) 3486 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3487 #else 3488 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3489 #endif 3490 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3491 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3492 B_neigs += B_neigs2; 3493 } 3494 if (B_ierr) { 3495 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3496 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3497 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3498 } 3499 if (pcbddc->dbg_flag) { 3500 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3501 for (j=0;j<B_neigs;j++) { 3502 if (eigs[j] == 0.0) { 3503 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3504 } else { 3505 if (pcbddc->use_deluxe_scaling) { 3506 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3507 } else { 3508 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3509 } 3510 } 3511 } 3512 } 3513 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3514 } 3515 /* change the basis back to the original one */ 3516 if (sub_schurs->change) { 3517 Mat change,phi,phit; 3518 3519 if (pcbddc->dbg_flag > 2) { 3520 PetscInt ii; 3521 for (ii=0;ii<B_neigs;ii++) { 3522 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3523 for (j=0;j<B_N;j++) { 3524 #if defined(PETSC_USE_COMPLEX) 3525 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3526 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3527 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3528 #else 3529 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3530 #endif 3531 } 3532 } 3533 } 3534 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3535 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3536 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3537 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3538 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3539 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3540 } 3541 maxneigs = PetscMax(B_neigs,maxneigs); 3542 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3543 if (B_neigs) { 3544 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3545 3546 if (pcbddc->dbg_flag > 1) { 3547 PetscInt ii; 3548 for (ii=0;ii<B_neigs;ii++) { 3549 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3550 for (j=0;j<B_N;j++) { 3551 #if defined(PETSC_USE_COMPLEX) 3552 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3553 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3554 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3555 #else 3556 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3557 #endif 3558 } 3559 } 3560 } 3561 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3562 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3563 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3564 cum++; 3565 } 3566 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3567 /* shift for next computation */ 3568 cumarray += subset_size*subset_size; 3569 } 3570 if (pcbddc->dbg_flag) { 3571 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3572 } 3573 3574 if (mss) { 3575 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3576 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3577 /* destroy matrices (junk) */ 3578 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3579 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3580 } 3581 if (allocated_S_St) { 3582 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3583 } 3584 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3585 #if defined(PETSC_USE_COMPLEX) 3586 ierr = PetscFree(rwork);CHKERRQ(ierr); 3587 #endif 3588 if (pcbddc->dbg_flag) { 3589 PetscInt maxneigs_r; 3590 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3591 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);CHKERRQ(ierr); 3592 } 3593 ierr = PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3594 PetscFunctionReturn(0); 3595 } 3596 3597 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3598 { 3599 PetscScalar *coarse_submat_vals; 3600 PetscErrorCode ierr; 3601 3602 PetscFunctionBegin; 3603 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3604 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3605 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3606 3607 /* Setup local neumann solver ksp_R */ 3608 /* PCBDDCSetUpLocalScatters should be called first! */ 3609 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3610 3611 /* 3612 Setup local correction and local part of coarse basis. 3613 Gives back the dense local part of the coarse matrix in column major ordering 3614 */ 3615 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3616 3617 /* Compute total number of coarse nodes and setup coarse solver */ 3618 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3619 3620 /* free */ 3621 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3622 PetscFunctionReturn(0); 3623 } 3624 3625 PetscErrorCode PCBDDCResetCustomization(PC pc) 3626 { 3627 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3628 PetscErrorCode ierr; 3629 3630 PetscFunctionBegin; 3631 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3632 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3633 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3634 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3635 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3636 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3637 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3638 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3639 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3640 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3641 PetscFunctionReturn(0); 3642 } 3643 3644 PetscErrorCode PCBDDCResetTopography(PC pc) 3645 { 3646 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3647 PetscInt i; 3648 PetscErrorCode ierr; 3649 3650 PetscFunctionBegin; 3651 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3652 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3653 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3654 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3655 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3656 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3657 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3658 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3659 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3660 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3661 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3662 for (i=0;i<pcbddc->n_local_subs;i++) { 3663 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3664 } 3665 pcbddc->n_local_subs = 0; 3666 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3667 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3668 pcbddc->graphanalyzed = PETSC_FALSE; 3669 pcbddc->recompute_topography = PETSC_TRUE; 3670 pcbddc->corner_selected = PETSC_FALSE; 3671 PetscFunctionReturn(0); 3672 } 3673 3674 PetscErrorCode PCBDDCResetSolvers(PC pc) 3675 { 3676 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3677 PetscErrorCode ierr; 3678 3679 PetscFunctionBegin; 3680 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3681 if (pcbddc->coarse_phi_B) { 3682 PetscScalar *array; 3683 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3684 ierr = PetscFree(array);CHKERRQ(ierr); 3685 } 3686 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3687 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3688 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3689 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3690 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3691 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3692 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3693 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3694 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3695 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3696 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3697 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3698 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3699 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3700 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3701 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3702 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3703 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3704 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3705 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3706 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3707 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3708 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3709 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3710 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3711 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3712 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3713 if (pcbddc->benign_zerodiag_subs) { 3714 PetscInt i; 3715 for (i=0;i<pcbddc->benign_n;i++) { 3716 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3717 } 3718 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3719 } 3720 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3721 PetscFunctionReturn(0); 3722 } 3723 3724 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3725 { 3726 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3727 PC_IS *pcis = (PC_IS*)pc->data; 3728 VecType impVecType; 3729 PetscInt n_constraints,n_R,old_size; 3730 PetscErrorCode ierr; 3731 3732 PetscFunctionBegin; 3733 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3734 n_R = pcis->n - pcbddc->n_vertices; 3735 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3736 /* local work vectors (try to avoid unneeded work)*/ 3737 /* R nodes */ 3738 old_size = -1; 3739 if (pcbddc->vec1_R) { 3740 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3741 } 3742 if (n_R != old_size) { 3743 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3744 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3745 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3746 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3747 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3748 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3749 } 3750 /* local primal dofs */ 3751 old_size = -1; 3752 if (pcbddc->vec1_P) { 3753 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3754 } 3755 if (pcbddc->local_primal_size != old_size) { 3756 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3757 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3758 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3759 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3760 } 3761 /* local explicit constraints */ 3762 old_size = -1; 3763 if (pcbddc->vec1_C) { 3764 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3765 } 3766 if (n_constraints && n_constraints != old_size) { 3767 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3768 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3769 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3770 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3771 } 3772 PetscFunctionReturn(0); 3773 } 3774 3775 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3776 { 3777 PetscErrorCode ierr; 3778 /* pointers to pcis and pcbddc */ 3779 PC_IS* pcis = (PC_IS*)pc->data; 3780 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3781 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3782 /* submatrices of local problem */ 3783 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3784 /* submatrices of local coarse problem */ 3785 Mat S_VV,S_CV,S_VC,S_CC; 3786 /* working matrices */ 3787 Mat C_CR; 3788 /* additional working stuff */ 3789 PC pc_R; 3790 Mat F,Brhs = NULL; 3791 Vec dummy_vec; 3792 PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs; 3793 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3794 PetscScalar *work; 3795 PetscInt *idx_V_B; 3796 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3797 PetscInt i,n_R,n_D,n_B; 3798 3799 /* some shortcuts to scalars */ 3800 PetscScalar one=1.0,m_one=-1.0; 3801 3802 PetscFunctionBegin; 3803 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3804 ierr = PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3805 3806 /* Set Non-overlapping dimensions */ 3807 n_vertices = pcbddc->n_vertices; 3808 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3809 n_B = pcis->n_B; 3810 n_D = pcis->n - n_B; 3811 n_R = pcis->n - n_vertices; 3812 3813 /* vertices in boundary numbering */ 3814 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3815 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3816 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i); 3817 3818 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3819 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3820 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3821 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3822 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3823 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3824 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3825 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3826 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3827 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3828 3829 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3830 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3831 ierr = PCSetUp(pc_R);CHKERRQ(ierr); 3832 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3833 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3834 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3835 lda_rhs = n_R; 3836 need_benign_correction = PETSC_FALSE; 3837 if (isLU || isILU || isCHOL) { 3838 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3839 } else if (sub_schurs && sub_schurs->reuse_solver) { 3840 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3841 MatFactorType type; 3842 3843 F = reuse_solver->F; 3844 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3845 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3846 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3847 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3848 } else { 3849 F = NULL; 3850 } 3851 3852 /* determine if we can use a sparse right-hand side */ 3853 sparserhs = PETSC_FALSE; 3854 if (F) { 3855 MatSolverType solver; 3856 3857 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3858 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3859 } 3860 3861 /* allocate workspace */ 3862 n = 0; 3863 if (n_constraints) { 3864 n += lda_rhs*n_constraints; 3865 } 3866 if (n_vertices) { 3867 n = PetscMax(2*lda_rhs*n_vertices,n); 3868 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3869 } 3870 if (!pcbddc->symmetric_primal) { 3871 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3872 } 3873 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3874 3875 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3876 dummy_vec = NULL; 3877 if (need_benign_correction && lda_rhs != n_R && F) { 3878 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);CHKERRQ(ierr); 3879 ierr = VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);CHKERRQ(ierr); 3880 ierr = VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);CHKERRQ(ierr); 3881 } 3882 3883 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3884 if (n_constraints) { 3885 Mat M3,C_B; 3886 IS is_aux; 3887 PetscScalar *array,*array2; 3888 3889 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3890 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3891 3892 /* Extract constraints on R nodes: C_{CR} */ 3893 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3894 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3895 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3896 3897 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3898 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3899 if (!sparserhs) { 3900 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3901 for (i=0;i<n_constraints;i++) { 3902 const PetscScalar *row_cmat_values; 3903 const PetscInt *row_cmat_indices; 3904 PetscInt size_of_constraint,j; 3905 3906 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3907 for (j=0;j<size_of_constraint;j++) { 3908 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3909 } 3910 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3911 } 3912 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 3913 } else { 3914 Mat tC_CR; 3915 3916 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3917 if (lda_rhs != n_R) { 3918 PetscScalar *aa; 3919 PetscInt r,*ii,*jj; 3920 PetscBool done; 3921 3922 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3923 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 3924 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 3925 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 3926 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3927 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 3928 } else { 3929 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 3930 tC_CR = C_CR; 3931 } 3932 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 3933 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 3934 } 3935 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3936 if (F) { 3937 if (need_benign_correction) { 3938 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3939 3940 /* rhs is already zero on interior dofs, no need to change the rhs */ 3941 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3942 } 3943 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 3944 if (need_benign_correction) { 3945 PetscScalar *marr; 3946 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3947 3948 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3949 if (lda_rhs != n_R) { 3950 for (i=0;i<n_constraints;i++) { 3951 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3952 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3953 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3954 } 3955 } else { 3956 for (i=0;i<n_constraints;i++) { 3957 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3958 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3959 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3960 } 3961 } 3962 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3963 } 3964 } else { 3965 PetscScalar *marr; 3966 3967 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3968 for (i=0;i<n_constraints;i++) { 3969 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3970 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3971 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3972 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3973 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3974 } 3975 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3976 } 3977 if (sparserhs) { 3978 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3979 } 3980 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3981 if (!pcbddc->switch_static) { 3982 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3983 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3984 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3985 for (i=0;i<n_constraints;i++) { 3986 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3987 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3988 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3989 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3990 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3991 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3992 } 3993 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3994 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3995 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3996 } else { 3997 if (lda_rhs != n_R) { 3998 IS dummy; 3999 4000 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 4001 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4002 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 4003 } else { 4004 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 4005 pcbddc->local_auxmat2 = local_auxmat2_R; 4006 } 4007 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4008 } 4009 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4010 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 4011 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 4012 if (isCHOL) { 4013 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 4014 } else { 4015 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 4016 } 4017 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 4018 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4019 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 4020 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4021 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4022 ierr = MatDestroy(&M3);CHKERRQ(ierr); 4023 } 4024 4025 /* Get submatrices from subdomain matrix */ 4026 if (n_vertices) { 4027 IS is_aux; 4028 PetscBool isseqaij; 4029 4030 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4031 IS tis; 4032 4033 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 4034 ierr = ISSort(tis);CHKERRQ(ierr); 4035 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 4036 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4037 } else { 4038 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 4039 } 4040 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 4041 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 4042 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4043 if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */ 4044 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4045 } 4046 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 4047 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4048 } 4049 4050 /* Matrix of coarse basis functions (local) */ 4051 if (pcbddc->coarse_phi_B) { 4052 PetscInt on_B,on_primal,on_D=n_D; 4053 if (pcbddc->coarse_phi_D) { 4054 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 4055 } 4056 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 4057 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4058 PetscScalar *marray; 4059 4060 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 4061 ierr = PetscFree(marray);CHKERRQ(ierr); 4062 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4063 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4064 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4065 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4066 } 4067 } 4068 4069 if (!pcbddc->coarse_phi_B) { 4070 PetscScalar *marr; 4071 4072 /* memory size */ 4073 n = n_B*pcbddc->local_primal_size; 4074 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4075 if (!pcbddc->symmetric_primal) n *= 2; 4076 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 4077 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4078 marr += n_B*pcbddc->local_primal_size; 4079 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4080 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4081 marr += n_D*pcbddc->local_primal_size; 4082 } 4083 if (!pcbddc->symmetric_primal) { 4084 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4085 marr += n_B*pcbddc->local_primal_size; 4086 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4087 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4088 } 4089 } else { 4090 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 4091 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4092 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4093 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 4094 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4095 } 4096 } 4097 } 4098 4099 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4100 p0_lidx_I = NULL; 4101 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4102 const PetscInt *idxs; 4103 4104 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4105 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 4106 for (i=0;i<pcbddc->benign_n;i++) { 4107 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 4108 } 4109 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4110 } 4111 4112 /* vertices */ 4113 if (n_vertices) { 4114 PetscBool restoreavr = PETSC_FALSE; 4115 4116 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 4117 4118 if (n_R) { 4119 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4120 PetscBLASInt B_N,B_one = 1; 4121 PetscScalar *x,*y; 4122 4123 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 4124 if (need_benign_correction) { 4125 ISLocalToGlobalMapping RtoN; 4126 IS is_p0; 4127 PetscInt *idxs_p0,n; 4128 4129 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 4130 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 4131 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 4132 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n); 4133 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 4134 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 4135 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 4136 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 4137 } 4138 4139 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4140 if (!sparserhs || need_benign_correction) { 4141 if (lda_rhs == n_R) { 4142 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4143 } else { 4144 PetscScalar *av,*array; 4145 const PetscInt *xadj,*adjncy; 4146 PetscInt n; 4147 PetscBool flg_row; 4148 4149 array = work+lda_rhs*n_vertices; 4150 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4151 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4152 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4153 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 4154 for (i=0;i<n;i++) { 4155 PetscInt j; 4156 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4157 } 4158 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4159 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4160 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 4161 } 4162 if (need_benign_correction) { 4163 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4164 PetscScalar *marr; 4165 4166 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 4167 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4168 4169 | 0 0 0 | (V) 4170 L = | 0 0 -1 | (P-p0) 4171 | 0 0 -1 | (p0) 4172 4173 */ 4174 for (i=0;i<reuse_solver->benign_n;i++) { 4175 const PetscScalar *vals; 4176 const PetscInt *idxs,*idxs_zero; 4177 PetscInt n,j,nz; 4178 4179 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4180 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4181 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4182 for (j=0;j<n;j++) { 4183 PetscScalar val = vals[j]; 4184 PetscInt k,col = idxs[j]; 4185 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4186 } 4187 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4188 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4189 } 4190 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 4191 } 4192 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 4193 Brhs = A_RV; 4194 } else { 4195 Mat tA_RVT,A_RVT; 4196 4197 if (!pcbddc->symmetric_primal) { 4198 /* A_RV already scaled by -1 */ 4199 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 4200 } else { 4201 restoreavr = PETSC_TRUE; 4202 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4203 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 4204 A_RVT = A_VR; 4205 } 4206 if (lda_rhs != n_R) { 4207 PetscScalar *aa; 4208 PetscInt r,*ii,*jj; 4209 PetscBool done; 4210 4211 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4212 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4213 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4214 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4215 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4216 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4217 } else { 4218 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4219 tA_RVT = A_RVT; 4220 } 4221 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4222 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4223 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4224 } 4225 if (F) { 4226 /* need to correct the rhs */ 4227 if (need_benign_correction) { 4228 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4229 PetscScalar *marr; 4230 4231 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4232 if (lda_rhs != n_R) { 4233 for (i=0;i<n_vertices;i++) { 4234 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4235 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4236 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4237 } 4238 } else { 4239 for (i=0;i<n_vertices;i++) { 4240 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4241 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4242 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4243 } 4244 } 4245 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4246 } 4247 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4248 if (restoreavr) { 4249 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4250 } 4251 /* need to correct the solution */ 4252 if (need_benign_correction) { 4253 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4254 PetscScalar *marr; 4255 4256 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4257 if (lda_rhs != n_R) { 4258 for (i=0;i<n_vertices;i++) { 4259 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4260 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4261 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4262 } 4263 } else { 4264 for (i=0;i<n_vertices;i++) { 4265 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4266 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4267 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4268 } 4269 } 4270 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4271 } 4272 } else { 4273 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4274 for (i=0;i<n_vertices;i++) { 4275 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4276 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4277 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4278 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4279 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4280 } 4281 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4282 } 4283 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4284 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4285 /* S_VV and S_CV */ 4286 if (n_constraints) { 4287 Mat B; 4288 4289 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4290 for (i=0;i<n_vertices;i++) { 4291 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4292 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4293 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4294 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4295 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4296 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4297 } 4298 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4299 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 4300 ierr = MatDestroy(&B);CHKERRQ(ierr); 4301 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4302 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4303 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4304 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4305 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4306 ierr = MatDestroy(&B);CHKERRQ(ierr); 4307 } 4308 if (lda_rhs != n_R) { 4309 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4310 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4311 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4312 } 4313 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4314 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4315 if (need_benign_correction) { 4316 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4317 PetscScalar *marr,*sums; 4318 4319 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4320 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4321 for (i=0;i<reuse_solver->benign_n;i++) { 4322 const PetscScalar *vals; 4323 const PetscInt *idxs,*idxs_zero; 4324 PetscInt n,j,nz; 4325 4326 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4327 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4328 for (j=0;j<n_vertices;j++) { 4329 PetscInt k; 4330 sums[j] = 0.; 4331 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4332 } 4333 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4334 for (j=0;j<n;j++) { 4335 PetscScalar val = vals[j]; 4336 PetscInt k; 4337 for (k=0;k<n_vertices;k++) { 4338 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4339 } 4340 } 4341 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4342 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4343 } 4344 ierr = PetscFree(sums);CHKERRQ(ierr); 4345 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4346 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4347 } 4348 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4349 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4350 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 4351 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4352 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4353 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 4354 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4355 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4356 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4357 } else { 4358 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4359 } 4360 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4361 4362 /* coarse basis functions */ 4363 for (i=0;i<n_vertices;i++) { 4364 PetscScalar *y; 4365 4366 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4367 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4368 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4369 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4370 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4371 y[n_B*i+idx_V_B[i]] = 1.0; 4372 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4373 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4374 4375 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4376 PetscInt j; 4377 4378 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4379 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4380 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4381 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4382 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4383 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4384 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4385 } 4386 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4387 } 4388 /* if n_R == 0 the object is not destroyed */ 4389 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4390 } 4391 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4392 4393 if (n_constraints) { 4394 Mat B; 4395 4396 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4397 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4398 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4399 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4400 if (n_vertices) { 4401 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4402 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4403 } else { 4404 Mat S_VCt; 4405 4406 if (lda_rhs != n_R) { 4407 ierr = MatDestroy(&B);CHKERRQ(ierr); 4408 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4409 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4410 } 4411 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4412 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4413 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4414 } 4415 } 4416 ierr = MatDestroy(&B);CHKERRQ(ierr); 4417 /* coarse basis functions */ 4418 for (i=0;i<n_constraints;i++) { 4419 PetscScalar *y; 4420 4421 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4422 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4423 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4424 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4425 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4426 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4427 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4428 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4429 PetscInt j; 4430 4431 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4432 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4433 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4434 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4435 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4436 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4437 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4438 } 4439 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4440 } 4441 } 4442 if (n_constraints) { 4443 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4444 } 4445 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4446 4447 /* coarse matrix entries relative to B_0 */ 4448 if (pcbddc->benign_n) { 4449 Mat B0_B,B0_BPHI; 4450 IS is_dummy; 4451 PetscScalar *data; 4452 PetscInt j; 4453 4454 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4455 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4456 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4457 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4458 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4459 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 4460 for (j=0;j<pcbddc->benign_n;j++) { 4461 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4462 for (i=0;i<pcbddc->local_primal_size;i++) { 4463 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4464 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4465 } 4466 } 4467 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 4468 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4469 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4470 } 4471 4472 /* compute other basis functions for non-symmetric problems */ 4473 if (!pcbddc->symmetric_primal) { 4474 Mat B_V=NULL,B_C=NULL; 4475 PetscScalar *marray; 4476 4477 if (n_constraints) { 4478 Mat S_CCT,C_CRT; 4479 4480 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4481 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4482 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4483 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4484 if (n_vertices) { 4485 Mat S_VCT; 4486 4487 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4488 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4489 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4490 } 4491 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4492 } else { 4493 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4494 } 4495 if (n_vertices && n_R) { 4496 PetscScalar *av,*marray; 4497 const PetscInt *xadj,*adjncy; 4498 PetscInt n; 4499 PetscBool flg_row; 4500 4501 /* B_V = B_V - A_VR^T */ 4502 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4503 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4504 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4505 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4506 for (i=0;i<n;i++) { 4507 PetscInt j; 4508 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4509 } 4510 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4511 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4512 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4513 } 4514 4515 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4516 if (n_vertices) { 4517 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4518 for (i=0;i<n_vertices;i++) { 4519 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4520 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4521 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4522 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4523 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4524 } 4525 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4526 } 4527 if (B_C) { 4528 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4529 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4530 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4531 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4532 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4533 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4534 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4535 } 4536 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4537 } 4538 /* coarse basis functions */ 4539 for (i=0;i<pcbddc->local_primal_size;i++) { 4540 PetscScalar *y; 4541 4542 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4543 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4544 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4545 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4546 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4547 if (i<n_vertices) { 4548 y[n_B*i+idx_V_B[i]] = 1.0; 4549 } 4550 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4551 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4552 4553 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4554 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4555 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4556 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4557 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4558 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4559 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4560 } 4561 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4562 } 4563 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4564 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4565 } 4566 4567 /* free memory */ 4568 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4569 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4570 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4571 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4572 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4573 ierr = PetscFree(work);CHKERRQ(ierr); 4574 if (n_vertices) { 4575 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4576 } 4577 if (n_constraints) { 4578 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4579 } 4580 /* Checking coarse_sub_mat and coarse basis functios */ 4581 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4582 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4583 if (pcbddc->dbg_flag) { 4584 Mat coarse_sub_mat; 4585 Mat AUXMAT,TM1,TM2,TM3,TM4; 4586 Mat coarse_phi_D,coarse_phi_B; 4587 Mat coarse_psi_D,coarse_psi_B; 4588 Mat A_II,A_BB,A_IB,A_BI; 4589 Mat C_B,CPHI; 4590 IS is_dummy; 4591 Vec mones; 4592 MatType checkmattype=MATSEQAIJ; 4593 PetscReal real_value; 4594 4595 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4596 Mat A; 4597 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4598 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4599 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4600 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4601 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4602 ierr = MatDestroy(&A);CHKERRQ(ierr); 4603 } else { 4604 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4605 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4606 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4607 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4608 } 4609 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4610 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4611 if (!pcbddc->symmetric_primal) { 4612 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4613 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4614 } 4615 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4616 4617 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4618 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4619 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4620 if (!pcbddc->symmetric_primal) { 4621 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4622 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4623 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4624 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4625 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4626 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4627 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4628 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4629 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4630 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4631 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4632 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4633 } else { 4634 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4635 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4636 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4637 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4638 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4639 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4640 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4641 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4642 } 4643 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4644 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4645 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4646 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4647 if (pcbddc->benign_n) { 4648 Mat B0_B,B0_BPHI; 4649 PetscScalar *data,*data2; 4650 PetscInt j; 4651 4652 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4653 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4654 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4655 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4656 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4657 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4658 for (j=0;j<pcbddc->benign_n;j++) { 4659 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4660 for (i=0;i<pcbddc->local_primal_size;i++) { 4661 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4662 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4663 } 4664 } 4665 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4666 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4667 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4668 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4669 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4670 } 4671 #if 0 4672 { 4673 PetscViewer viewer; 4674 char filename[256]; 4675 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4676 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4677 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4678 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4679 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4680 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4681 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4682 if (pcbddc->coarse_phi_B) { 4683 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4684 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4685 } 4686 if (pcbddc->coarse_phi_D) { 4687 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4688 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4689 } 4690 if (pcbddc->coarse_psi_B) { 4691 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4692 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4693 } 4694 if (pcbddc->coarse_psi_D) { 4695 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4696 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4697 } 4698 ierr = PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");CHKERRQ(ierr); 4699 ierr = MatView(pcbddc->local_mat,viewer);CHKERRQ(ierr); 4700 ierr = PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");CHKERRQ(ierr); 4701 ierr = MatView(pcbddc->ConstraintMatrix,viewer);CHKERRQ(ierr); 4702 ierr = PetscObjectSetName((PetscObject)pcis->is_I_local,"I");CHKERRQ(ierr); 4703 ierr = ISView(pcis->is_I_local,viewer);CHKERRQ(ierr); 4704 ierr = PetscObjectSetName((PetscObject)pcis->is_B_local,"B");CHKERRQ(ierr); 4705 ierr = ISView(pcis->is_B_local,viewer);CHKERRQ(ierr); 4706 ierr = PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");CHKERRQ(ierr); 4707 ierr = ISView(pcbddc->is_R_local,viewer);CHKERRQ(ierr); 4708 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4709 } 4710 #endif 4711 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4712 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4713 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4714 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4715 4716 /* check constraints */ 4717 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4718 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4719 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4720 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4721 } else { 4722 PetscScalar *data; 4723 Mat tmat; 4724 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4725 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4726 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4727 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4728 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4729 } 4730 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4731 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4732 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4733 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4734 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4735 if (!pcbddc->symmetric_primal) { 4736 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4737 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4738 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4739 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4740 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4741 } 4742 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4743 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4744 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4745 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4746 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4747 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4748 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4749 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4750 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4751 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4752 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4753 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4754 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4755 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4756 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4757 if (!pcbddc->symmetric_primal) { 4758 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4759 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4760 } 4761 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4762 } 4763 /* get back data */ 4764 *coarse_submat_vals_n = coarse_submat_vals; 4765 ierr = PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 4766 PetscFunctionReturn(0); 4767 } 4768 4769 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4770 { 4771 Mat *work_mat; 4772 IS isrow_s,iscol_s; 4773 PetscBool rsorted,csorted; 4774 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4775 PetscErrorCode ierr; 4776 4777 PetscFunctionBegin; 4778 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4779 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4780 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4781 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4782 4783 if (!rsorted) { 4784 const PetscInt *idxs; 4785 PetscInt *idxs_sorted,i; 4786 4787 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4788 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4789 for (i=0;i<rsize;i++) { 4790 idxs_perm_r[i] = i; 4791 } 4792 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4793 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4794 for (i=0;i<rsize;i++) { 4795 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4796 } 4797 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4798 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4799 } else { 4800 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4801 isrow_s = isrow; 4802 } 4803 4804 if (!csorted) { 4805 if (isrow == iscol) { 4806 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4807 iscol_s = isrow_s; 4808 } else { 4809 const PetscInt *idxs; 4810 PetscInt *idxs_sorted,i; 4811 4812 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4813 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4814 for (i=0;i<csize;i++) { 4815 idxs_perm_c[i] = i; 4816 } 4817 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4818 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4819 for (i=0;i<csize;i++) { 4820 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4821 } 4822 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4823 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4824 } 4825 } else { 4826 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4827 iscol_s = iscol; 4828 } 4829 4830 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4831 4832 if (!rsorted || !csorted) { 4833 Mat new_mat; 4834 IS is_perm_r,is_perm_c; 4835 4836 if (!rsorted) { 4837 PetscInt *idxs_r,i; 4838 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4839 for (i=0;i<rsize;i++) { 4840 idxs_r[idxs_perm_r[i]] = i; 4841 } 4842 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4843 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4844 } else { 4845 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4846 } 4847 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4848 4849 if (!csorted) { 4850 if (isrow_s == iscol_s) { 4851 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4852 is_perm_c = is_perm_r; 4853 } else { 4854 PetscInt *idxs_c,i; 4855 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4856 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4857 for (i=0;i<csize;i++) { 4858 idxs_c[idxs_perm_c[i]] = i; 4859 } 4860 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4861 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4862 } 4863 } else { 4864 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4865 } 4866 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4867 4868 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4869 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4870 work_mat[0] = new_mat; 4871 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4872 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4873 } 4874 4875 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4876 *B = work_mat[0]; 4877 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4878 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4879 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4880 PetscFunctionReturn(0); 4881 } 4882 4883 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4884 { 4885 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4886 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4887 Mat new_mat,lA; 4888 IS is_local,is_global; 4889 PetscInt local_size; 4890 PetscBool isseqaij; 4891 PetscErrorCode ierr; 4892 4893 PetscFunctionBegin; 4894 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4895 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4896 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4897 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4898 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4899 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4900 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4901 4902 /* check */ 4903 if (pcbddc->dbg_flag) { 4904 Vec x,x_change; 4905 PetscReal error; 4906 4907 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4908 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4909 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4910 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4911 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4912 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4913 if (!pcbddc->change_interior) { 4914 const PetscScalar *x,*y,*v; 4915 PetscReal lerror = 0.; 4916 PetscInt i; 4917 4918 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4919 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4920 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4921 for (i=0;i<local_size;i++) 4922 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4923 lerror = PetscAbsScalar(x[i]-y[i]); 4924 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4925 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4926 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4927 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4928 if (error > PETSC_SMALL) { 4929 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4930 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error); 4931 } else { 4932 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error); 4933 } 4934 } 4935 } 4936 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4937 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4938 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4939 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4940 if (error > PETSC_SMALL) { 4941 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4942 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 4943 } else { 4944 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error); 4945 } 4946 } 4947 ierr = VecDestroy(&x);CHKERRQ(ierr); 4948 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4949 } 4950 4951 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4952 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4953 4954 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4955 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4956 if (isseqaij) { 4957 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4958 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4959 if (lA) { 4960 Mat work; 4961 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4962 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4963 ierr = MatDestroy(&work);CHKERRQ(ierr); 4964 } 4965 } else { 4966 Mat work_mat; 4967 4968 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4969 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4970 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4971 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4972 if (lA) { 4973 Mat work; 4974 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4975 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4976 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4977 ierr = MatDestroy(&work);CHKERRQ(ierr); 4978 } 4979 } 4980 if (matis->A->symmetric_set) { 4981 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4982 #if !defined(PETSC_USE_COMPLEX) 4983 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4984 #endif 4985 } 4986 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4987 PetscFunctionReturn(0); 4988 } 4989 4990 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4991 { 4992 PC_IS* pcis = (PC_IS*)(pc->data); 4993 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4994 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4995 PetscInt *idx_R_local=NULL; 4996 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4997 PetscInt vbs,bs; 4998 PetscBT bitmask=NULL; 4999 PetscErrorCode ierr; 5000 5001 PetscFunctionBegin; 5002 /* 5003 No need to setup local scatters if 5004 - primal space is unchanged 5005 AND 5006 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5007 AND 5008 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5009 */ 5010 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 5011 PetscFunctionReturn(0); 5012 } 5013 /* destroy old objects */ 5014 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 5015 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 5016 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 5017 /* Set Non-overlapping dimensions */ 5018 n_B = pcis->n_B; 5019 n_D = pcis->n - n_B; 5020 n_vertices = pcbddc->n_vertices; 5021 5022 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5023 5024 /* create auxiliary bitmask and allocate workspace */ 5025 if (!sub_schurs || !sub_schurs->reuse_solver) { 5026 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 5027 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 5028 for (i=0;i<n_vertices;i++) { 5029 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 5030 } 5031 5032 for (i=0, n_R=0; i<pcis->n; i++) { 5033 if (!PetscBTLookup(bitmask,i)) { 5034 idx_R_local[n_R++] = i; 5035 } 5036 } 5037 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5038 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5039 5040 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5041 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 5042 } 5043 5044 /* Block code */ 5045 vbs = 1; 5046 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 5047 if (bs>1 && !(n_vertices%bs)) { 5048 PetscBool is_blocked = PETSC_TRUE; 5049 PetscInt *vary; 5050 if (!sub_schurs || !sub_schurs->reuse_solver) { 5051 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 5052 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 5053 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5054 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5055 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5056 for (i=0; i<pcis->n/bs; i++) { 5057 if (vary[i]!=0 && vary[i]!=bs) { 5058 is_blocked = PETSC_FALSE; 5059 break; 5060 } 5061 } 5062 ierr = PetscFree(vary);CHKERRQ(ierr); 5063 } else { 5064 /* Verify directly the R set */ 5065 for (i=0; i<n_R/bs; i++) { 5066 PetscInt j,node=idx_R_local[bs*i]; 5067 for (j=1; j<bs; j++) { 5068 if (node != idx_R_local[bs*i+j]-j) { 5069 is_blocked = PETSC_FALSE; 5070 break; 5071 } 5072 } 5073 } 5074 } 5075 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5076 vbs = bs; 5077 for (i=0;i<n_R/vbs;i++) { 5078 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5079 } 5080 } 5081 } 5082 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 5083 if (sub_schurs && sub_schurs->reuse_solver) { 5084 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5085 5086 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5087 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 5088 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 5089 reuse_solver->is_R = pcbddc->is_R_local; 5090 } else { 5091 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 5092 } 5093 5094 /* print some info if requested */ 5095 if (pcbddc->dbg_flag) { 5096 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5097 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5098 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5099 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 5100 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 5101 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 5102 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5103 } 5104 5105 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5106 if (!sub_schurs || !sub_schurs->reuse_solver) { 5107 IS is_aux1,is_aux2; 5108 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5109 5110 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5111 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 5112 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 5113 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5114 for (i=0; i<n_D; i++) { 5115 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 5116 } 5117 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5118 for (i=0, j=0; i<n_R; i++) { 5119 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5120 aux_array1[j++] = i; 5121 } 5122 } 5123 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5124 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5125 for (i=0, j=0; i<n_B; i++) { 5126 if (!PetscBTLookup(bitmask,is_indices[i])) { 5127 aux_array2[j++] = i; 5128 } 5129 } 5130 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5131 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 5132 ierr = VecScatterCreateWithData(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 5133 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5134 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 5135 5136 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5137 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 5138 for (i=0, j=0; i<n_R; i++) { 5139 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5140 aux_array1[j++] = i; 5141 } 5142 } 5143 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5144 ierr = VecScatterCreateWithData(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5145 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5146 } 5147 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 5148 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5149 } else { 5150 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5151 IS tis; 5152 PetscInt schur_size; 5153 5154 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 5155 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 5156 ierr = VecScatterCreateWithData(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 5157 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5158 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5159 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 5160 ierr = VecScatterCreateWithData(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5161 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5162 } 5163 } 5164 PetscFunctionReturn(0); 5165 } 5166 5167 5168 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5169 { 5170 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5171 PC_IS *pcis = (PC_IS*)pc->data; 5172 PC pc_temp; 5173 Mat A_RR; 5174 MatReuse reuse; 5175 PetscScalar m_one = -1.0; 5176 PetscReal value; 5177 PetscInt n_D,n_R; 5178 PetscBool check_corr,issbaij; 5179 PetscErrorCode ierr; 5180 /* prefixes stuff */ 5181 char dir_prefix[256],neu_prefix[256],str_level[16]; 5182 size_t len; 5183 5184 PetscFunctionBegin; 5185 ierr = PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5186 /* compute prefixes */ 5187 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 5188 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 5189 if (!pcbddc->current_level) { 5190 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));CHKERRQ(ierr); 5191 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));CHKERRQ(ierr); 5192 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5193 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5194 } else { 5195 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 5196 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5197 len -= 15; /* remove "pc_bddc_coarse_" */ 5198 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5199 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5200 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5201 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5202 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5203 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5204 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5205 ierr = PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));CHKERRQ(ierr); 5206 ierr = PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));CHKERRQ(ierr); 5207 } 5208 5209 /* DIRICHLET PROBLEM */ 5210 if (dirichlet) { 5211 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5212 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5213 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 5214 if (pcbddc->dbg_flag) { 5215 Mat A_IIn; 5216 5217 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5218 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5219 pcis->A_II = A_IIn; 5220 } 5221 } 5222 if (pcbddc->local_mat->symmetric_set) { 5223 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5224 } 5225 /* Matrix for Dirichlet problem is pcis->A_II */ 5226 n_D = pcis->n - pcis->n_B; 5227 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5228 void (*f)(void) = 0; 5229 5230 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5231 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5232 /* default */ 5233 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5234 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5235 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5236 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5237 if (issbaij) { 5238 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5239 } else { 5240 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5241 } 5242 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);CHKERRQ(ierr); 5243 /* Allow user's customization */ 5244 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5245 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5246 if (f && pcbddc->mat_graph->cloc) { 5247 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5248 const PetscInt *idxs; 5249 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5250 5251 ierr = ISGetLocalSize(pcis->is_I_local,&nl);CHKERRQ(ierr); 5252 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5253 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5254 for (i=0;i<nl;i++) { 5255 for (d=0;d<cdim;d++) { 5256 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5257 } 5258 } 5259 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5260 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5261 ierr = PetscFree(scoords);CHKERRQ(ierr); 5262 } 5263 } 5264 ierr = MatSetOptionsPrefix(pcis->A_II,((PetscObject)pcbddc->ksp_D)->prefix);CHKERRQ(ierr); 5265 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 5266 if (sub_schurs && sub_schurs->reuse_solver) { 5267 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5268 5269 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5270 } 5271 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5272 if (!n_D) { 5273 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5274 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5275 } 5276 /* set ksp_D into pcis data */ 5277 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5278 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5279 pcis->ksp_D = pcbddc->ksp_D; 5280 } 5281 5282 /* NEUMANN PROBLEM */ 5283 A_RR = 0; 5284 if (neumann) { 5285 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5286 PetscInt ibs,mbs; 5287 PetscBool issbaij, reuse_neumann_solver; 5288 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5289 5290 reuse_neumann_solver = PETSC_FALSE; 5291 if (sub_schurs && sub_schurs->reuse_solver) { 5292 IS iP; 5293 5294 reuse_neumann_solver = PETSC_TRUE; 5295 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5296 if (iP) reuse_neumann_solver = PETSC_FALSE; 5297 } 5298 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5299 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5300 if (pcbddc->ksp_R) { /* already created ksp */ 5301 PetscInt nn_R; 5302 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5303 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5304 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5305 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5306 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5307 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5308 reuse = MAT_INITIAL_MATRIX; 5309 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5310 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5311 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5312 reuse = MAT_INITIAL_MATRIX; 5313 } else { /* safe to reuse the matrix */ 5314 reuse = MAT_REUSE_MATRIX; 5315 } 5316 } 5317 /* last check */ 5318 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5319 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5320 reuse = MAT_INITIAL_MATRIX; 5321 } 5322 } else { /* first time, so we need to create the matrix */ 5323 reuse = MAT_INITIAL_MATRIX; 5324 } 5325 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 5326 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5327 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5328 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5329 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5330 if (matis->A == pcbddc->local_mat) { 5331 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5332 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5333 } else { 5334 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5335 } 5336 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5337 if (matis->A == pcbddc->local_mat) { 5338 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5339 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5340 } else { 5341 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5342 } 5343 } 5344 /* extract A_RR */ 5345 if (reuse_neumann_solver) { 5346 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5347 5348 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5349 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5350 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5351 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5352 } else { 5353 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5354 } 5355 } else { 5356 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5357 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5358 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5359 } 5360 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5361 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5362 } 5363 if (pcbddc->local_mat->symmetric_set) { 5364 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5365 } 5366 if (!pcbddc->ksp_R) { /* create object if not present */ 5367 void (*f)(void) = 0; 5368 5369 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5370 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5371 /* default */ 5372 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5373 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5374 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5375 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5376 if (issbaij) { 5377 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5378 } else { 5379 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5380 } 5381 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);CHKERRQ(ierr); 5382 /* Allow user's customization */ 5383 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5384 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5385 if (f && pcbddc->mat_graph->cloc) { 5386 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5387 const PetscInt *idxs; 5388 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5389 5390 ierr = ISGetLocalSize(pcbddc->is_R_local,&nl);CHKERRQ(ierr); 5391 ierr = ISGetIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5392 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5393 for (i=0;i<nl;i++) { 5394 for (d=0;d<cdim;d++) { 5395 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5396 } 5397 } 5398 ierr = ISRestoreIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5399 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5400 ierr = PetscFree(scoords);CHKERRQ(ierr); 5401 } 5402 } 5403 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5404 if (!n_R) { 5405 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5406 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5407 } 5408 ierr = MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);CHKERRQ(ierr); 5409 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5410 /* Reuse solver if it is present */ 5411 if (reuse_neumann_solver) { 5412 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5413 5414 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5415 } 5416 } 5417 5418 if (pcbddc->dbg_flag) { 5419 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5420 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5421 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5422 } 5423 5424 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5425 check_corr = PETSC_FALSE; 5426 if (pcbddc->NullSpace_corr[0]) { 5427 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5428 } 5429 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5430 check_corr = PETSC_TRUE; 5431 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5432 } 5433 if (neumann && pcbddc->NullSpace_corr[2]) { 5434 check_corr = PETSC_TRUE; 5435 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5436 } 5437 /* check Dirichlet and Neumann solvers */ 5438 if (pcbddc->dbg_flag) { 5439 if (dirichlet) { /* Dirichlet */ 5440 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5441 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5442 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5443 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5444 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5445 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5446 if (check_corr) { 5447 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 5448 } 5449 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5450 } 5451 if (neumann) { /* Neumann */ 5452 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5453 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5454 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5455 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5456 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5457 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5458 if (check_corr) { 5459 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 5460 } 5461 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5462 } 5463 } 5464 /* free Neumann problem's matrix */ 5465 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5466 ierr = PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5467 PetscFunctionReturn(0); 5468 } 5469 5470 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5471 { 5472 PetscErrorCode ierr; 5473 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5474 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5475 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 5476 5477 PetscFunctionBegin; 5478 if (!reuse_solver) { 5479 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5480 } 5481 if (!pcbddc->switch_static) { 5482 if (applytranspose && pcbddc->local_auxmat1) { 5483 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5484 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5485 } 5486 if (!reuse_solver) { 5487 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5488 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5489 } else { 5490 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5491 5492 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5493 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5494 } 5495 } else { 5496 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5497 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5498 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5499 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5500 if (applytranspose && pcbddc->local_auxmat1) { 5501 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5502 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5503 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5504 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5505 } 5506 } 5507 if (!reuse_solver || pcbddc->switch_static) { 5508 if (applytranspose) { 5509 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5510 } else { 5511 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5512 } 5513 } else { 5514 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5515 5516 if (applytranspose) { 5517 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5518 } else { 5519 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5520 } 5521 } 5522 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5523 if (!pcbddc->switch_static) { 5524 if (!reuse_solver) { 5525 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5526 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5527 } else { 5528 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5529 5530 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5531 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5532 } 5533 if (!applytranspose && pcbddc->local_auxmat1) { 5534 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5535 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5536 } 5537 } else { 5538 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5539 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5540 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5541 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5542 if (!applytranspose && pcbddc->local_auxmat1) { 5543 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5544 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5545 } 5546 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5547 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5548 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5549 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5550 } 5551 PetscFunctionReturn(0); 5552 } 5553 5554 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5555 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5556 { 5557 PetscErrorCode ierr; 5558 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5559 PC_IS* pcis = (PC_IS*) (pc->data); 5560 const PetscScalar zero = 0.0; 5561 5562 PetscFunctionBegin; 5563 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5564 if (!pcbddc->benign_apply_coarse_only) { 5565 if (applytranspose) { 5566 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5567 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5568 } else { 5569 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5570 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5571 } 5572 } else { 5573 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5574 } 5575 5576 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5577 if (pcbddc->benign_n) { 5578 PetscScalar *array; 5579 PetscInt j; 5580 5581 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5582 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5583 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5584 } 5585 5586 /* start communications from local primal nodes to rhs of coarse solver */ 5587 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5588 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5589 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5590 5591 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5592 if (pcbddc->coarse_ksp) { 5593 Mat coarse_mat; 5594 Vec rhs,sol; 5595 MatNullSpace nullsp; 5596 PetscBool isbddc = PETSC_FALSE; 5597 5598 if (pcbddc->benign_have_null) { 5599 PC coarse_pc; 5600 5601 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5602 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5603 /* we need to propagate to coarser levels the need for a possible benign correction */ 5604 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5605 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5606 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5607 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5608 } 5609 } 5610 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5611 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5612 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5613 if (applytranspose) { 5614 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5615 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5616 ierr = MatGetTransposeNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5617 if (nullsp) { 5618 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5619 } 5620 } else { 5621 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5622 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5623 PC coarse_pc; 5624 5625 if (nullsp) { 5626 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5627 } 5628 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5629 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5630 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5631 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5632 } else { 5633 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5634 if (nullsp) { 5635 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5636 } 5637 } 5638 } 5639 /* we don't need the benign correction at coarser levels anymore */ 5640 if (pcbddc->benign_have_null && isbddc) { 5641 PC coarse_pc; 5642 PC_BDDC* coarsepcbddc; 5643 5644 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5645 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5646 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5647 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5648 } 5649 } 5650 5651 /* Local solution on R nodes */ 5652 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5653 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5654 } 5655 /* communications from coarse sol to local primal nodes */ 5656 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5657 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5658 5659 /* Sum contributions from the two levels */ 5660 if (!pcbddc->benign_apply_coarse_only) { 5661 if (applytranspose) { 5662 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5663 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5664 } else { 5665 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5666 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5667 } 5668 /* store p0 */ 5669 if (pcbddc->benign_n) { 5670 PetscScalar *array; 5671 PetscInt j; 5672 5673 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5674 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5675 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5676 } 5677 } else { /* expand the coarse solution */ 5678 if (applytranspose) { 5679 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5680 } else { 5681 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5682 } 5683 } 5684 PetscFunctionReturn(0); 5685 } 5686 5687 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5688 { 5689 PetscErrorCode ierr; 5690 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5691 PetscScalar *array; 5692 Vec from,to; 5693 5694 PetscFunctionBegin; 5695 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5696 from = pcbddc->coarse_vec; 5697 to = pcbddc->vec1_P; 5698 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5699 Vec tvec; 5700 5701 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5702 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5703 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5704 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5705 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5706 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5707 } 5708 } else { /* from local to global -> put data in coarse right hand side */ 5709 from = pcbddc->vec1_P; 5710 to = pcbddc->coarse_vec; 5711 } 5712 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5713 PetscFunctionReturn(0); 5714 } 5715 5716 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5717 { 5718 PetscErrorCode ierr; 5719 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5720 PetscScalar *array; 5721 Vec from,to; 5722 5723 PetscFunctionBegin; 5724 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5725 from = pcbddc->coarse_vec; 5726 to = pcbddc->vec1_P; 5727 } else { /* from local to global -> put data in coarse right hand side */ 5728 from = pcbddc->vec1_P; 5729 to = pcbddc->coarse_vec; 5730 } 5731 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5732 if (smode == SCATTER_FORWARD) { 5733 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5734 Vec tvec; 5735 5736 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5737 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5738 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5739 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5740 } 5741 } else { 5742 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5743 ierr = VecResetArray(from);CHKERRQ(ierr); 5744 } 5745 } 5746 PetscFunctionReturn(0); 5747 } 5748 5749 /* uncomment for testing purposes */ 5750 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5751 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5752 { 5753 PetscErrorCode ierr; 5754 PC_IS* pcis = (PC_IS*)(pc->data); 5755 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5756 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5757 /* one and zero */ 5758 PetscScalar one=1.0,zero=0.0; 5759 /* space to store constraints and their local indices */ 5760 PetscScalar *constraints_data; 5761 PetscInt *constraints_idxs,*constraints_idxs_B; 5762 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5763 PetscInt *constraints_n; 5764 /* iterators */ 5765 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5766 /* BLAS integers */ 5767 PetscBLASInt lwork,lierr; 5768 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5769 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5770 /* reuse */ 5771 PetscInt olocal_primal_size,olocal_primal_size_cc; 5772 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5773 /* change of basis */ 5774 PetscBool qr_needed; 5775 PetscBT change_basis,qr_needed_idx; 5776 /* auxiliary stuff */ 5777 PetscInt *nnz,*is_indices; 5778 PetscInt ncc; 5779 /* some quantities */ 5780 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5781 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5782 PetscReal tol; /* tolerance for retaining eigenmodes */ 5783 5784 PetscFunctionBegin; 5785 tol = PetscSqrtReal(PETSC_SMALL); 5786 /* Destroy Mat objects computed previously */ 5787 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5788 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5789 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5790 /* save info on constraints from previous setup (if any) */ 5791 olocal_primal_size = pcbddc->local_primal_size; 5792 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5793 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5794 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5795 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5796 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5797 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5798 5799 if (!pcbddc->adaptive_selection) { 5800 IS ISForVertices,*ISForFaces,*ISForEdges; 5801 MatNullSpace nearnullsp; 5802 const Vec *nearnullvecs; 5803 Vec *localnearnullsp; 5804 PetscScalar *array; 5805 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5806 PetscBool nnsp_has_cnst; 5807 /* LAPACK working arrays for SVD or POD */ 5808 PetscBool skip_lapack,boolforchange; 5809 PetscScalar *work; 5810 PetscReal *singular_vals; 5811 #if defined(PETSC_USE_COMPLEX) 5812 PetscReal *rwork; 5813 #endif 5814 #if defined(PETSC_MISSING_LAPACK_GESVD) 5815 PetscScalar *temp_basis,*correlation_mat; 5816 #else 5817 PetscBLASInt dummy_int=1; 5818 PetscScalar dummy_scalar=1.; 5819 #endif 5820 5821 /* Get index sets for faces, edges and vertices from graph */ 5822 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5823 /* print some info */ 5824 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5825 PetscInt nv; 5826 5827 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5828 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5829 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5830 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5831 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5832 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5833 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5834 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5835 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5836 } 5837 5838 /* free unneeded index sets */ 5839 if (!pcbddc->use_vertices) { 5840 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5841 } 5842 if (!pcbddc->use_edges) { 5843 for (i=0;i<n_ISForEdges;i++) { 5844 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5845 } 5846 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5847 n_ISForEdges = 0; 5848 } 5849 if (!pcbddc->use_faces) { 5850 for (i=0;i<n_ISForFaces;i++) { 5851 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5852 } 5853 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5854 n_ISForFaces = 0; 5855 } 5856 5857 /* check if near null space is attached to global mat */ 5858 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5859 if (nearnullsp) { 5860 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5861 /* remove any stored info */ 5862 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5863 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5864 /* store information for BDDC solver reuse */ 5865 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5866 pcbddc->onearnullspace = nearnullsp; 5867 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5868 for (i=0;i<nnsp_size;i++) { 5869 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5870 } 5871 } else { /* if near null space is not provided BDDC uses constants by default */ 5872 nnsp_size = 0; 5873 nnsp_has_cnst = PETSC_TRUE; 5874 } 5875 /* get max number of constraints on a single cc */ 5876 max_constraints = nnsp_size; 5877 if (nnsp_has_cnst) max_constraints++; 5878 5879 /* 5880 Evaluate maximum storage size needed by the procedure 5881 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5882 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5883 There can be multiple constraints per connected component 5884 */ 5885 n_vertices = 0; 5886 if (ISForVertices) { 5887 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5888 } 5889 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5890 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5891 5892 total_counts = n_ISForFaces+n_ISForEdges; 5893 total_counts *= max_constraints; 5894 total_counts += n_vertices; 5895 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5896 5897 total_counts = 0; 5898 max_size_of_constraint = 0; 5899 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5900 IS used_is; 5901 if (i<n_ISForEdges) { 5902 used_is = ISForEdges[i]; 5903 } else { 5904 used_is = ISForFaces[i-n_ISForEdges]; 5905 } 5906 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5907 total_counts += j; 5908 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5909 } 5910 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5911 5912 /* get local part of global near null space vectors */ 5913 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5914 for (k=0;k<nnsp_size;k++) { 5915 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5916 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5917 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5918 } 5919 5920 /* whether or not to skip lapack calls */ 5921 skip_lapack = PETSC_TRUE; 5922 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5923 5924 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5925 if (!skip_lapack) { 5926 PetscScalar temp_work; 5927 5928 #if defined(PETSC_MISSING_LAPACK_GESVD) 5929 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5930 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5931 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5932 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5933 #if defined(PETSC_USE_COMPLEX) 5934 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5935 #endif 5936 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5937 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5938 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5939 lwork = -1; 5940 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5941 #if !defined(PETSC_USE_COMPLEX) 5942 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5943 #else 5944 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5945 #endif 5946 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5947 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5948 #else /* on missing GESVD */ 5949 /* SVD */ 5950 PetscInt max_n,min_n; 5951 max_n = max_size_of_constraint; 5952 min_n = max_constraints; 5953 if (max_size_of_constraint < max_constraints) { 5954 min_n = max_size_of_constraint; 5955 max_n = max_constraints; 5956 } 5957 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5958 #if defined(PETSC_USE_COMPLEX) 5959 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5960 #endif 5961 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5962 lwork = -1; 5963 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5964 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5965 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5966 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5967 #if !defined(PETSC_USE_COMPLEX) 5968 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5969 #else 5970 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5971 #endif 5972 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5973 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5974 #endif /* on missing GESVD */ 5975 /* Allocate optimal workspace */ 5976 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5977 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5978 } 5979 /* Now we can loop on constraining sets */ 5980 total_counts = 0; 5981 constraints_idxs_ptr[0] = 0; 5982 constraints_data_ptr[0] = 0; 5983 /* vertices */ 5984 if (n_vertices) { 5985 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5986 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5987 for (i=0;i<n_vertices;i++) { 5988 constraints_n[total_counts] = 1; 5989 constraints_data[total_counts] = 1.0; 5990 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5991 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5992 total_counts++; 5993 } 5994 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5995 n_vertices = total_counts; 5996 } 5997 5998 /* edges and faces */ 5999 total_counts_cc = total_counts; 6000 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 6001 IS used_is; 6002 PetscBool idxs_copied = PETSC_FALSE; 6003 6004 if (ncc<n_ISForEdges) { 6005 used_is = ISForEdges[ncc]; 6006 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6007 } else { 6008 used_is = ISForFaces[ncc-n_ISForEdges]; 6009 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6010 } 6011 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6012 6013 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 6014 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6015 /* change of basis should not be performed on local periodic nodes */ 6016 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6017 if (nnsp_has_cnst) { 6018 PetscScalar quad_value; 6019 6020 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 6021 idxs_copied = PETSC_TRUE; 6022 6023 if (!pcbddc->use_nnsp_true) { 6024 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 6025 } else { 6026 quad_value = 1.0; 6027 } 6028 for (j=0;j<size_of_constraint;j++) { 6029 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 6030 } 6031 temp_constraints++; 6032 total_counts++; 6033 } 6034 for (k=0;k<nnsp_size;k++) { 6035 PetscReal real_value; 6036 PetscScalar *ptr_to_data; 6037 6038 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6039 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6040 for (j=0;j<size_of_constraint;j++) { 6041 ptr_to_data[j] = array[is_indices[j]]; 6042 } 6043 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6044 /* check if array is null on the connected component */ 6045 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6046 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6047 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6048 temp_constraints++; 6049 total_counts++; 6050 if (!idxs_copied) { 6051 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 6052 idxs_copied = PETSC_TRUE; 6053 } 6054 } 6055 } 6056 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6057 valid_constraints = temp_constraints; 6058 if (!pcbddc->use_nnsp_true && temp_constraints) { 6059 if (temp_constraints == 1) { /* just normalize the constraint */ 6060 PetscScalar norm,*ptr_to_data; 6061 6062 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6063 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6064 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6065 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6066 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6067 } else { /* perform SVD */ 6068 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6069 6070 #if defined(PETSC_MISSING_LAPACK_GESVD) 6071 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6072 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6073 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6074 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6075 from that computed using LAPACKgesvd 6076 -> This is due to a different computation of eigenvectors in LAPACKheev 6077 -> The quality of the POD-computed basis will be the same */ 6078 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 6079 /* Store upper triangular part of correlation matrix */ 6080 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6081 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6082 for (j=0;j<temp_constraints;j++) { 6083 for (k=0;k<j+1;k++) { 6084 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6085 } 6086 } 6087 /* compute eigenvalues and eigenvectors of correlation matrix */ 6088 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6089 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 6090 #if !defined(PETSC_USE_COMPLEX) 6091 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6092 #else 6093 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6094 #endif 6095 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6096 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6097 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6098 j = 0; 6099 while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++; 6100 total_counts = total_counts-j; 6101 valid_constraints = temp_constraints-j; 6102 /* scale and copy POD basis into used quadrature memory */ 6103 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6104 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6105 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 6106 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6107 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 6108 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6109 if (j<temp_constraints) { 6110 PetscInt ii; 6111 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6112 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6113 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6114 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6115 for (k=0;k<temp_constraints-j;k++) { 6116 for (ii=0;ii<size_of_constraint;ii++) { 6117 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6118 } 6119 } 6120 } 6121 #else /* on missing GESVD */ 6122 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6123 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6124 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6125 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6126 #if !defined(PETSC_USE_COMPLEX) 6127 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6128 #else 6129 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6130 #endif 6131 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6132 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6133 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6134 k = temp_constraints; 6135 if (k > size_of_constraint) k = size_of_constraint; 6136 j = 0; 6137 while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++; 6138 valid_constraints = k-j; 6139 total_counts = total_counts-temp_constraints+valid_constraints; 6140 #endif /* on missing GESVD */ 6141 } 6142 } 6143 /* update pointers information */ 6144 if (valid_constraints) { 6145 constraints_n[total_counts_cc] = valid_constraints; 6146 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6147 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6148 /* set change_of_basis flag */ 6149 if (boolforchange) { 6150 PetscBTSet(change_basis,total_counts_cc); 6151 } 6152 total_counts_cc++; 6153 } 6154 } 6155 /* free workspace */ 6156 if (!skip_lapack) { 6157 ierr = PetscFree(work);CHKERRQ(ierr); 6158 #if defined(PETSC_USE_COMPLEX) 6159 ierr = PetscFree(rwork);CHKERRQ(ierr); 6160 #endif 6161 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 6162 #if defined(PETSC_MISSING_LAPACK_GESVD) 6163 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 6164 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 6165 #endif 6166 } 6167 for (k=0;k<nnsp_size;k++) { 6168 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 6169 } 6170 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 6171 /* free index sets of faces, edges and vertices */ 6172 for (i=0;i<n_ISForFaces;i++) { 6173 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6174 } 6175 if (n_ISForFaces) { 6176 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6177 } 6178 for (i=0;i<n_ISForEdges;i++) { 6179 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6180 } 6181 if (n_ISForEdges) { 6182 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6183 } 6184 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6185 } else { 6186 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6187 6188 total_counts = 0; 6189 n_vertices = 0; 6190 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6191 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6192 } 6193 max_constraints = 0; 6194 total_counts_cc = 0; 6195 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6196 total_counts += pcbddc->adaptive_constraints_n[i]; 6197 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6198 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6199 } 6200 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6201 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6202 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6203 constraints_data = pcbddc->adaptive_constraints_data; 6204 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6205 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 6206 total_counts_cc = 0; 6207 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6208 if (pcbddc->adaptive_constraints_n[i]) { 6209 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6210 } 6211 } 6212 6213 max_size_of_constraint = 0; 6214 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6215 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 6216 /* Change of basis */ 6217 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 6218 if (pcbddc->use_change_of_basis) { 6219 for (i=0;i<sub_schurs->n_subs;i++) { 6220 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6221 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 6222 } 6223 } 6224 } 6225 } 6226 pcbddc->local_primal_size = total_counts; 6227 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6228 6229 /* map constraints_idxs in boundary numbering */ 6230 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 6231 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i); 6232 6233 /* Create constraint matrix */ 6234 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6235 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 6236 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 6237 6238 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6239 /* determine if a QR strategy is needed for change of basis */ 6240 qr_needed = pcbddc->use_qr_single; 6241 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6242 total_primal_vertices=0; 6243 pcbddc->local_primal_size_cc = 0; 6244 for (i=0;i<total_counts_cc;i++) { 6245 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6246 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6247 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6248 pcbddc->local_primal_size_cc += 1; 6249 } else if (PetscBTLookup(change_basis,i)) { 6250 for (k=0;k<constraints_n[i];k++) { 6251 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6252 } 6253 pcbddc->local_primal_size_cc += constraints_n[i]; 6254 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6255 PetscBTSet(qr_needed_idx,i); 6256 qr_needed = PETSC_TRUE; 6257 } 6258 } else { 6259 pcbddc->local_primal_size_cc += 1; 6260 } 6261 } 6262 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6263 pcbddc->n_vertices = total_primal_vertices; 6264 /* permute indices in order to have a sorted set of vertices */ 6265 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6266 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6267 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 6268 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6269 6270 /* nonzero structure of constraint matrix */ 6271 /* and get reference dof for local constraints */ 6272 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6273 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6274 6275 j = total_primal_vertices; 6276 total_counts = total_primal_vertices; 6277 cum = total_primal_vertices; 6278 for (i=n_vertices;i<total_counts_cc;i++) { 6279 if (!PetscBTLookup(change_basis,i)) { 6280 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6281 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6282 cum++; 6283 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6284 for (k=0;k<constraints_n[i];k++) { 6285 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6286 nnz[j+k] = size_of_constraint; 6287 } 6288 j += constraints_n[i]; 6289 } 6290 } 6291 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6292 ierr = MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6293 ierr = PetscFree(nnz);CHKERRQ(ierr); 6294 6295 /* set values in constraint matrix */ 6296 for (i=0;i<total_primal_vertices;i++) { 6297 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6298 } 6299 total_counts = total_primal_vertices; 6300 for (i=n_vertices;i<total_counts_cc;i++) { 6301 if (!PetscBTLookup(change_basis,i)) { 6302 PetscInt *cols; 6303 6304 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6305 cols = constraints_idxs+constraints_idxs_ptr[i]; 6306 for (k=0;k<constraints_n[i];k++) { 6307 PetscInt row = total_counts+k; 6308 PetscScalar *vals; 6309 6310 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6311 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6312 } 6313 total_counts += constraints_n[i]; 6314 } 6315 } 6316 /* assembling */ 6317 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6318 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6319 ierr = MatViewFromOptions(pcbddc->ConstraintMatrix,NULL,"-pc_bddc_constraint_mat_view");CHKERRQ(ierr); 6320 6321 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6322 if (pcbddc->use_change_of_basis) { 6323 /* dual and primal dofs on a single cc */ 6324 PetscInt dual_dofs,primal_dofs; 6325 /* working stuff for GEQRF */ 6326 PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t; 6327 PetscBLASInt lqr_work; 6328 /* working stuff for UNGQR */ 6329 PetscScalar *gqr_work = NULL,lgqr_work_t; 6330 PetscBLASInt lgqr_work; 6331 /* working stuff for TRTRS */ 6332 PetscScalar *trs_rhs = NULL; 6333 PetscBLASInt Blas_NRHS; 6334 /* pointers for values insertion into change of basis matrix */ 6335 PetscInt *start_rows,*start_cols; 6336 PetscScalar *start_vals; 6337 /* working stuff for values insertion */ 6338 PetscBT is_primal; 6339 PetscInt *aux_primal_numbering_B; 6340 /* matrix sizes */ 6341 PetscInt global_size,local_size; 6342 /* temporary change of basis */ 6343 Mat localChangeOfBasisMatrix; 6344 /* extra space for debugging */ 6345 PetscScalar *dbg_work = NULL; 6346 6347 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6348 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6349 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6350 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6351 /* nonzeros for local mat */ 6352 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6353 if (!pcbddc->benign_change || pcbddc->fake_change) { 6354 for (i=0;i<pcis->n;i++) nnz[i]=1; 6355 } else { 6356 const PetscInt *ii; 6357 PetscInt n; 6358 PetscBool flg_row; 6359 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6360 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6361 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6362 } 6363 for (i=n_vertices;i<total_counts_cc;i++) { 6364 if (PetscBTLookup(change_basis,i)) { 6365 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6366 if (PetscBTLookup(qr_needed_idx,i)) { 6367 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6368 } else { 6369 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6370 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6371 } 6372 } 6373 } 6374 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6375 ierr = MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6376 ierr = PetscFree(nnz);CHKERRQ(ierr); 6377 /* Set interior change in the matrix */ 6378 if (!pcbddc->benign_change || pcbddc->fake_change) { 6379 for (i=0;i<pcis->n;i++) { 6380 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6381 } 6382 } else { 6383 const PetscInt *ii,*jj; 6384 PetscScalar *aa; 6385 PetscInt n; 6386 PetscBool flg_row; 6387 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6388 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6389 for (i=0;i<n;i++) { 6390 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6391 } 6392 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6393 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6394 } 6395 6396 if (pcbddc->dbg_flag) { 6397 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6398 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6399 } 6400 6401 6402 /* Now we loop on the constraints which need a change of basis */ 6403 /* 6404 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6405 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6406 6407 Basic blocks of change of basis matrix T computed by 6408 6409 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6410 6411 | 1 0 ... 0 s_1/S | 6412 | 0 1 ... 0 s_2/S | 6413 | ... | 6414 | 0 ... 1 s_{n-1}/S | 6415 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6416 6417 with S = \sum_{i=1}^n s_i^2 6418 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6419 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6420 6421 - QR decomposition of constraints otherwise 6422 */ 6423 if (qr_needed && max_size_of_constraint) { 6424 /* space to store Q */ 6425 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6426 /* array to store scaling factors for reflectors */ 6427 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6428 /* first we issue queries for optimal work */ 6429 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6430 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6431 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6432 lqr_work = -1; 6433 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6434 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6435 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6436 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6437 lgqr_work = -1; 6438 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6439 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6440 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6441 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6442 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6443 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6444 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6445 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6446 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6447 /* array to store rhs and solution of triangular solver */ 6448 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6449 /* allocating workspace for check */ 6450 if (pcbddc->dbg_flag) { 6451 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6452 } 6453 } 6454 /* array to store whether a node is primal or not */ 6455 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6456 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6457 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6458 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i); 6459 for (i=0;i<total_primal_vertices;i++) { 6460 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6461 } 6462 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6463 6464 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6465 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6466 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6467 if (PetscBTLookup(change_basis,total_counts)) { 6468 /* get constraint info */ 6469 primal_dofs = constraints_n[total_counts]; 6470 dual_dofs = size_of_constraint-primal_dofs; 6471 6472 if (pcbddc->dbg_flag) { 6473 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6474 } 6475 6476 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6477 6478 /* copy quadrature constraints for change of basis check */ 6479 if (pcbddc->dbg_flag) { 6480 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6481 } 6482 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6483 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6484 6485 /* compute QR decomposition of constraints */ 6486 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6487 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6488 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6489 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6490 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6491 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6492 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6493 6494 /* explictly compute R^-T */ 6495 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 6496 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6497 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6498 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6499 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6500 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6501 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6502 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6503 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6504 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6505 6506 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6507 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6508 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6509 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6510 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6511 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6512 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6513 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6514 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6515 6516 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6517 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6518 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6519 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6520 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6521 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6522 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6523 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6524 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6525 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6526 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6527 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6528 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6529 6530 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6531 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6532 /* insert cols for primal dofs */ 6533 for (j=0;j<primal_dofs;j++) { 6534 start_vals = &qr_basis[j*size_of_constraint]; 6535 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6536 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6537 } 6538 /* insert cols for dual dofs */ 6539 for (j=0,k=0;j<dual_dofs;k++) { 6540 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6541 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6542 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6543 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6544 j++; 6545 } 6546 } 6547 6548 /* check change of basis */ 6549 if (pcbddc->dbg_flag) { 6550 PetscInt ii,jj; 6551 PetscBool valid_qr=PETSC_TRUE; 6552 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6553 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6554 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6555 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6556 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6557 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6558 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6559 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6560 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6561 for (jj=0;jj<size_of_constraint;jj++) { 6562 for (ii=0;ii<primal_dofs;ii++) { 6563 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6564 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6565 } 6566 } 6567 if (!valid_qr) { 6568 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6569 for (jj=0;jj<size_of_constraint;jj++) { 6570 for (ii=0;ii<primal_dofs;ii++) { 6571 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6572 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6573 } 6574 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6575 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6576 } 6577 } 6578 } 6579 } else { 6580 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6581 } 6582 } 6583 } else { /* simple transformation block */ 6584 PetscInt row,col; 6585 PetscScalar val,norm; 6586 6587 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6588 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6589 for (j=0;j<size_of_constraint;j++) { 6590 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6591 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6592 if (!PetscBTLookup(is_primal,row_B)) { 6593 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6594 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6595 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6596 } else { 6597 for (k=0;k<size_of_constraint;k++) { 6598 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6599 if (row != col) { 6600 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6601 } else { 6602 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6603 } 6604 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6605 } 6606 } 6607 } 6608 if (pcbddc->dbg_flag) { 6609 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6610 } 6611 } 6612 } else { 6613 if (pcbddc->dbg_flag) { 6614 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6615 } 6616 } 6617 } 6618 6619 /* free workspace */ 6620 if (qr_needed) { 6621 if (pcbddc->dbg_flag) { 6622 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6623 } 6624 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6625 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6626 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6627 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6628 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6629 } 6630 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6631 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6632 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6633 6634 /* assembling of global change of variable */ 6635 if (!pcbddc->fake_change) { 6636 Mat tmat; 6637 PetscInt bs; 6638 6639 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6640 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6641 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6642 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6643 ierr = MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6644 ierr = MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6645 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6646 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6647 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6648 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6649 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6650 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6651 ierr = MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6652 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6653 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6654 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6655 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6656 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6657 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6658 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6659 6660 /* check */ 6661 if (pcbddc->dbg_flag) { 6662 PetscReal error; 6663 Vec x,x_change; 6664 6665 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6666 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6667 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6668 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6669 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6670 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6671 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6672 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6673 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6674 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6675 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6676 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6677 if (error > PETSC_SMALL) { 6678 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 6679 } 6680 ierr = VecDestroy(&x);CHKERRQ(ierr); 6681 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6682 } 6683 /* adapt sub_schurs computed (if any) */ 6684 if (pcbddc->use_deluxe_scaling) { 6685 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6686 6687 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6688 if (sub_schurs && sub_schurs->S_Ej_all) { 6689 Mat S_new,tmat; 6690 IS is_all_N,is_V_Sall = NULL; 6691 6692 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6693 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6694 if (pcbddc->deluxe_zerorows) { 6695 ISLocalToGlobalMapping NtoSall; 6696 IS is_V; 6697 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6698 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6699 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6700 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6701 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6702 } 6703 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6704 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6705 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6706 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6707 if (pcbddc->deluxe_zerorows) { 6708 const PetscScalar *array; 6709 const PetscInt *idxs_V,*idxs_all; 6710 PetscInt i,n_V; 6711 6712 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6713 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6714 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6715 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6716 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6717 for (i=0;i<n_V;i++) { 6718 PetscScalar val; 6719 PetscInt idx; 6720 6721 idx = idxs_V[i]; 6722 val = array[idxs_all[idxs_V[i]]]; 6723 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6724 } 6725 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6726 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6727 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6728 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6729 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6730 } 6731 sub_schurs->S_Ej_all = S_new; 6732 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6733 if (sub_schurs->sum_S_Ej_all) { 6734 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6735 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6736 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6737 if (pcbddc->deluxe_zerorows) { 6738 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6739 } 6740 sub_schurs->sum_S_Ej_all = S_new; 6741 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6742 } 6743 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6744 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6745 } 6746 /* destroy any change of basis context in sub_schurs */ 6747 if (sub_schurs && sub_schurs->change) { 6748 PetscInt i; 6749 6750 for (i=0;i<sub_schurs->n_subs;i++) { 6751 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6752 } 6753 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6754 } 6755 } 6756 if (pcbddc->switch_static) { /* need to save the local change */ 6757 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6758 } else { 6759 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6760 } 6761 /* determine if any process has changed the pressures locally */ 6762 pcbddc->change_interior = pcbddc->benign_have_null; 6763 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6764 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6765 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6766 pcbddc->use_qr_single = qr_needed; 6767 } 6768 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6769 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6770 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6771 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6772 } else { 6773 Mat benign_global = NULL; 6774 if (pcbddc->benign_have_null) { 6775 Mat M; 6776 6777 pcbddc->change_interior = PETSC_TRUE; 6778 ierr = VecCopy(matis->counter,pcis->vec1_N);CHKERRQ(ierr); 6779 ierr = VecReciprocal(pcis->vec1_N);CHKERRQ(ierr); 6780 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);CHKERRQ(ierr); 6781 if (pcbddc->benign_change) { 6782 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6783 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6784 } else { 6785 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);CHKERRQ(ierr); 6786 ierr = MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);CHKERRQ(ierr); 6787 } 6788 ierr = MatISSetLocalMat(benign_global,M);CHKERRQ(ierr); 6789 ierr = MatDestroy(&M);CHKERRQ(ierr); 6790 ierr = MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6791 ierr = MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6792 } 6793 if (pcbddc->user_ChangeOfBasisMatrix) { 6794 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6795 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6796 } else if (pcbddc->benign_have_null) { 6797 pcbddc->ChangeOfBasisMatrix = benign_global; 6798 } 6799 } 6800 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6801 IS is_global; 6802 const PetscInt *gidxs; 6803 6804 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6805 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6806 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6807 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6808 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6809 } 6810 } 6811 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6812 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6813 } 6814 6815 if (!pcbddc->fake_change) { 6816 /* add pressure dofs to set of primal nodes for numbering purposes */ 6817 for (i=0;i<pcbddc->benign_n;i++) { 6818 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6819 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6820 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6821 pcbddc->local_primal_size_cc++; 6822 pcbddc->local_primal_size++; 6823 } 6824 6825 /* check if a new primal space has been introduced (also take into account benign trick) */ 6826 pcbddc->new_primal_space_local = PETSC_TRUE; 6827 if (olocal_primal_size == pcbddc->local_primal_size) { 6828 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6829 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6830 if (!pcbddc->new_primal_space_local) { 6831 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6832 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6833 } 6834 } 6835 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6836 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6837 } 6838 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6839 6840 /* flush dbg viewer */ 6841 if (pcbddc->dbg_flag) { 6842 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6843 } 6844 6845 /* free workspace */ 6846 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6847 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6848 if (!pcbddc->adaptive_selection) { 6849 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6850 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6851 } else { 6852 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6853 pcbddc->adaptive_constraints_idxs_ptr, 6854 pcbddc->adaptive_constraints_data_ptr, 6855 pcbddc->adaptive_constraints_idxs, 6856 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6857 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6858 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6859 } 6860 PetscFunctionReturn(0); 6861 } 6862 /* #undef PETSC_MISSING_LAPACK_GESVD */ 6863 6864 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6865 { 6866 ISLocalToGlobalMapping map; 6867 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6868 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6869 PetscInt i,N; 6870 PetscBool rcsr = PETSC_FALSE; 6871 PetscErrorCode ierr; 6872 6873 PetscFunctionBegin; 6874 if (pcbddc->recompute_topography) { 6875 pcbddc->graphanalyzed = PETSC_FALSE; 6876 /* Reset previously computed graph */ 6877 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6878 /* Init local Graph struct */ 6879 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6880 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6881 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6882 6883 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6884 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6885 } 6886 /* Check validity of the csr graph passed in by the user */ 6887 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6888 6889 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6890 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6891 PetscInt *xadj,*adjncy; 6892 PetscInt nvtxs; 6893 PetscBool flg_row=PETSC_FALSE; 6894 6895 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6896 if (flg_row) { 6897 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6898 pcbddc->computed_rowadj = PETSC_TRUE; 6899 } 6900 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6901 rcsr = PETSC_TRUE; 6902 } 6903 if (pcbddc->dbg_flag) { 6904 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6905 } 6906 6907 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 6908 PetscReal *lcoords; 6909 PetscInt n; 6910 MPI_Datatype dimrealtype; 6911 6912 if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 6913 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 6914 ierr = PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);CHKERRQ(ierr); 6915 ierr = MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);CHKERRQ(ierr); 6916 ierr = MPI_Type_commit(&dimrealtype);CHKERRQ(ierr); 6917 ierr = PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 6918 ierr = PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 6919 ierr = MPI_Type_free(&dimrealtype);CHKERRQ(ierr); 6920 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 6921 6922 pcbddc->mat_graph->coords = lcoords; 6923 pcbddc->mat_graph->cloc = PETSC_TRUE; 6924 pcbddc->mat_graph->cnloc = n; 6925 } 6926 if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 6927 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected); 6928 6929 /* Setup of Graph */ 6930 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6931 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6932 6933 /* attach info on disconnected subdomains if present */ 6934 if (pcbddc->n_local_subs) { 6935 PetscInt *local_subs; 6936 6937 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6938 for (i=0;i<pcbddc->n_local_subs;i++) { 6939 const PetscInt *idxs; 6940 PetscInt nl,j; 6941 6942 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6943 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6944 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6945 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6946 } 6947 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6948 pcbddc->mat_graph->local_subs = local_subs; 6949 } 6950 } 6951 6952 if (!pcbddc->graphanalyzed) { 6953 /* Graph's connected components analysis */ 6954 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6955 pcbddc->graphanalyzed = PETSC_TRUE; 6956 } 6957 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6958 PetscFunctionReturn(0); 6959 } 6960 6961 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6962 { 6963 PetscInt i,j; 6964 PetscScalar *alphas; 6965 PetscErrorCode ierr; 6966 6967 PetscFunctionBegin; 6968 if (!n) PetscFunctionReturn(0); 6969 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6970 ierr = VecNormalize(vecs[0],NULL);CHKERRQ(ierr); 6971 for (i=1;i<n;i++) { 6972 ierr = VecMDot(vecs[i],i,vecs,alphas);CHKERRQ(ierr); 6973 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 6974 ierr = VecMAXPY(vecs[i],i,alphas,vecs);CHKERRQ(ierr); 6975 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6976 } 6977 ierr = PetscFree(alphas);CHKERRQ(ierr); 6978 PetscFunctionReturn(0); 6979 } 6980 6981 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6982 { 6983 Mat A; 6984 PetscInt n_neighs,*neighs,*n_shared,**shared; 6985 PetscMPIInt size,rank,color; 6986 PetscInt *xadj,*adjncy; 6987 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6988 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6989 PetscInt void_procs,*procs_candidates = NULL; 6990 PetscInt xadj_count,*count; 6991 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6992 PetscSubcomm psubcomm; 6993 MPI_Comm subcomm; 6994 PetscErrorCode ierr; 6995 6996 PetscFunctionBegin; 6997 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6998 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6999 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7000 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 7001 PetscValidLogicalCollectiveInt(mat,redprocs,3); 7002 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains); 7003 7004 if (have_void) *have_void = PETSC_FALSE; 7005 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 7006 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 7007 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 7008 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 7009 im_active = !!n; 7010 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7011 void_procs = size - active_procs; 7012 /* get ranks of of non-active processes in mat communicator */ 7013 if (void_procs) { 7014 PetscInt ncand; 7015 7016 if (have_void) *have_void = PETSC_TRUE; 7017 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 7018 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7019 for (i=0,ncand=0;i<size;i++) { 7020 if (!procs_candidates[i]) { 7021 procs_candidates[ncand++] = i; 7022 } 7023 } 7024 /* force n_subdomains to be not greater that the number of non-active processes */ 7025 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7026 } 7027 7028 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7029 number of subdomains requested 1 -> send to master or first candidate in voids */ 7030 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 7031 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7032 PetscInt issize,isidx,dest; 7033 if (*n_subdomains == 1) dest = 0; 7034 else dest = rank; 7035 if (im_active) { 7036 issize = 1; 7037 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7038 isidx = procs_candidates[dest]; 7039 } else { 7040 isidx = dest; 7041 } 7042 } else { 7043 issize = 0; 7044 isidx = -1; 7045 } 7046 if (*n_subdomains != 1) *n_subdomains = active_procs; 7047 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 7048 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7049 PetscFunctionReturn(0); 7050 } 7051 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 7052 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 7053 threshold = PetscMax(threshold,2); 7054 7055 /* Get info on mapping */ 7056 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7057 7058 /* build local CSR graph of subdomains' connectivity */ 7059 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 7060 xadj[0] = 0; 7061 xadj[1] = PetscMax(n_neighs-1,0); 7062 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 7063 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 7064 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 7065 for (i=1;i<n_neighs;i++) 7066 for (j=0;j<n_shared[i];j++) 7067 count[shared[i][j]] += 1; 7068 7069 xadj_count = 0; 7070 for (i=1;i<n_neighs;i++) { 7071 for (j=0;j<n_shared[i];j++) { 7072 if (count[shared[i][j]] < threshold) { 7073 adjncy[xadj_count] = neighs[i]; 7074 adjncy_wgt[xadj_count] = n_shared[i]; 7075 xadj_count++; 7076 break; 7077 } 7078 } 7079 } 7080 xadj[1] = xadj_count; 7081 ierr = PetscFree(count);CHKERRQ(ierr); 7082 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7083 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7084 7085 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 7086 7087 /* Restrict work on active processes only */ 7088 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 7089 if (void_procs) { 7090 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 7091 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 7092 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 7093 subcomm = PetscSubcommChild(psubcomm); 7094 } else { 7095 psubcomm = NULL; 7096 subcomm = PetscObjectComm((PetscObject)mat); 7097 } 7098 7099 v_wgt = NULL; 7100 if (!color) { 7101 ierr = PetscFree(xadj);CHKERRQ(ierr); 7102 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7103 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7104 } else { 7105 Mat subdomain_adj; 7106 IS new_ranks,new_ranks_contig; 7107 MatPartitioning partitioner; 7108 PetscInt rstart=0,rend=0; 7109 PetscInt *is_indices,*oldranks; 7110 PetscMPIInt size; 7111 PetscBool aggregate; 7112 7113 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 7114 if (void_procs) { 7115 PetscInt prank = rank; 7116 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 7117 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 7118 for (i=0;i<xadj[1];i++) { 7119 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 7120 } 7121 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7122 } else { 7123 oldranks = NULL; 7124 } 7125 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7126 if (aggregate) { /* TODO: all this part could be made more efficient */ 7127 PetscInt lrows,row,ncols,*cols; 7128 PetscMPIInt nrank; 7129 PetscScalar *vals; 7130 7131 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 7132 lrows = 0; 7133 if (nrank<redprocs) { 7134 lrows = size/redprocs; 7135 if (nrank<size%redprocs) lrows++; 7136 } 7137 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 7138 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 7139 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7140 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7141 row = nrank; 7142 ncols = xadj[1]-xadj[0]; 7143 cols = adjncy; 7144 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 7145 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7146 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 7147 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7148 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7149 ierr = PetscFree(xadj);CHKERRQ(ierr); 7150 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7151 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7152 ierr = PetscFree(vals);CHKERRQ(ierr); 7153 if (use_vwgt) { 7154 Vec v; 7155 const PetscScalar *array; 7156 PetscInt nl; 7157 7158 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 7159 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 7160 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 7161 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 7162 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 7163 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 7164 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 7165 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7166 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 7167 ierr = VecDestroy(&v);CHKERRQ(ierr); 7168 } 7169 } else { 7170 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 7171 if (use_vwgt) { 7172 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 7173 v_wgt[0] = n; 7174 } 7175 } 7176 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 7177 7178 /* Partition */ 7179 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 7180 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 7181 if (v_wgt) { 7182 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 7183 } 7184 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7185 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 7186 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 7187 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 7188 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 7189 7190 /* renumber new_ranks to avoid "holes" in new set of processors */ 7191 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 7192 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 7193 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7194 if (!aggregate) { 7195 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7196 #if defined(PETSC_USE_DEBUG) 7197 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7198 #endif 7199 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7200 } else if (oldranks) { 7201 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7202 } else { 7203 ranks_send_to_idx[0] = is_indices[0]; 7204 } 7205 } else { 7206 PetscInt idx = 0; 7207 PetscMPIInt tag; 7208 MPI_Request *reqs; 7209 7210 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 7211 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 7212 for (i=rstart;i<rend;i++) { 7213 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 7214 } 7215 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 7216 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7217 ierr = PetscFree(reqs);CHKERRQ(ierr); 7218 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7219 #if defined(PETSC_USE_DEBUG) 7220 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7221 #endif 7222 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7223 } else if (oldranks) { 7224 ranks_send_to_idx[0] = oldranks[idx]; 7225 } else { 7226 ranks_send_to_idx[0] = idx; 7227 } 7228 } 7229 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7230 /* clean up */ 7231 ierr = PetscFree(oldranks);CHKERRQ(ierr); 7232 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 7233 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 7234 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 7235 } 7236 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 7237 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7238 7239 /* assemble parallel IS for sends */ 7240 i = 1; 7241 if (!color) i=0; 7242 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 7243 PetscFunctionReturn(0); 7244 } 7245 7246 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7247 7248 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7249 { 7250 Mat local_mat; 7251 IS is_sends_internal; 7252 PetscInt rows,cols,new_local_rows; 7253 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7254 PetscBool ismatis,isdense,newisdense,destroy_mat; 7255 ISLocalToGlobalMapping l2gmap; 7256 PetscInt* l2gmap_indices; 7257 const PetscInt* is_indices; 7258 MatType new_local_type; 7259 /* buffers */ 7260 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7261 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7262 PetscInt *recv_buffer_idxs_local; 7263 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 7264 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7265 /* MPI */ 7266 MPI_Comm comm,comm_n; 7267 PetscSubcomm subcomm; 7268 PetscMPIInt n_sends,n_recvs,size; 7269 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7270 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7271 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7272 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7273 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7274 PetscErrorCode ierr; 7275 7276 PetscFunctionBegin; 7277 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7278 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7279 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7280 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7281 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7282 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7283 PetscValidLogicalCollectiveBool(mat,reuse,6); 7284 PetscValidLogicalCollectiveInt(mat,nis,8); 7285 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7286 if (nvecs) { 7287 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7288 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7289 } 7290 /* further checks */ 7291 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7292 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7293 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7294 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7295 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7296 if (reuse && *mat_n) { 7297 PetscInt mrows,mcols,mnrows,mncols; 7298 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7299 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7300 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7301 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7302 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7303 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7304 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7305 } 7306 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7307 PetscValidLogicalCollectiveInt(mat,bs,0); 7308 7309 /* prepare IS for sending if not provided */ 7310 if (!is_sends) { 7311 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7312 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7313 } else { 7314 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7315 is_sends_internal = is_sends; 7316 } 7317 7318 /* get comm */ 7319 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7320 7321 /* compute number of sends */ 7322 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7323 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7324 7325 /* compute number of receives */ 7326 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 7327 ierr = PetscMalloc1(size,&iflags);CHKERRQ(ierr); 7328 ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr); 7329 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7330 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7331 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7332 ierr = PetscFree(iflags);CHKERRQ(ierr); 7333 7334 /* restrict comm if requested */ 7335 subcomm = 0; 7336 destroy_mat = PETSC_FALSE; 7337 if (restrict_comm) { 7338 PetscMPIInt color,subcommsize; 7339 7340 color = 0; 7341 if (restrict_full) { 7342 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7343 } else { 7344 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7345 } 7346 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7347 subcommsize = size - subcommsize; 7348 /* check if reuse has been requested */ 7349 if (reuse) { 7350 if (*mat_n) { 7351 PetscMPIInt subcommsize2; 7352 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 7353 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7354 comm_n = PetscObjectComm((PetscObject)*mat_n); 7355 } else { 7356 comm_n = PETSC_COMM_SELF; 7357 } 7358 } else { /* MAT_INITIAL_MATRIX */ 7359 PetscMPIInt rank; 7360 7361 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 7362 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7363 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7364 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7365 comm_n = PetscSubcommChild(subcomm); 7366 } 7367 /* flag to destroy *mat_n if not significative */ 7368 if (color) destroy_mat = PETSC_TRUE; 7369 } else { 7370 comm_n = comm; 7371 } 7372 7373 /* prepare send/receive buffers */ 7374 ierr = PetscMalloc1(size,&ilengths_idxs);CHKERRQ(ierr); 7375 ierr = PetscMemzero(ilengths_idxs,size*sizeof(*ilengths_idxs));CHKERRQ(ierr); 7376 ierr = PetscMalloc1(size,&ilengths_vals);CHKERRQ(ierr); 7377 ierr = PetscMemzero(ilengths_vals,size*sizeof(*ilengths_vals));CHKERRQ(ierr); 7378 if (nis) { 7379 ierr = PetscCalloc1(size,&ilengths_idxs_is);CHKERRQ(ierr); 7380 } 7381 7382 /* Get data from local matrices */ 7383 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7384 /* TODO: See below some guidelines on how to prepare the local buffers */ 7385 /* 7386 send_buffer_vals should contain the raw values of the local matrix 7387 send_buffer_idxs should contain: 7388 - MatType_PRIVATE type 7389 - PetscInt size_of_l2gmap 7390 - PetscInt global_row_indices[size_of_l2gmap] 7391 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7392 */ 7393 else { 7394 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7395 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7396 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7397 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7398 send_buffer_idxs[1] = i; 7399 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7400 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 7401 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7402 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7403 for (i=0;i<n_sends;i++) { 7404 ilengths_vals[is_indices[i]] = len*len; 7405 ilengths_idxs[is_indices[i]] = len+2; 7406 } 7407 } 7408 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7409 /* additional is (if any) */ 7410 if (nis) { 7411 PetscMPIInt psum; 7412 PetscInt j; 7413 for (j=0,psum=0;j<nis;j++) { 7414 PetscInt plen; 7415 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7416 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7417 psum += len+1; /* indices + lenght */ 7418 } 7419 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7420 for (j=0,psum=0;j<nis;j++) { 7421 PetscInt plen; 7422 const PetscInt *is_array_idxs; 7423 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7424 send_buffer_idxs_is[psum] = plen; 7425 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7426 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 7427 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7428 psum += plen+1; /* indices + lenght */ 7429 } 7430 for (i=0;i<n_sends;i++) { 7431 ilengths_idxs_is[is_indices[i]] = psum; 7432 } 7433 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7434 } 7435 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7436 7437 buf_size_idxs = 0; 7438 buf_size_vals = 0; 7439 buf_size_idxs_is = 0; 7440 buf_size_vecs = 0; 7441 for (i=0;i<n_recvs;i++) { 7442 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7443 buf_size_vals += (PetscInt)olengths_vals[i]; 7444 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7445 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7446 } 7447 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7448 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7449 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7450 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7451 7452 /* get new tags for clean communications */ 7453 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7454 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7455 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7456 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7457 7458 /* allocate for requests */ 7459 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7460 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7461 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7462 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7463 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7464 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7465 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7466 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7467 7468 /* communications */ 7469 ptr_idxs = recv_buffer_idxs; 7470 ptr_vals = recv_buffer_vals; 7471 ptr_idxs_is = recv_buffer_idxs_is; 7472 ptr_vecs = recv_buffer_vecs; 7473 for (i=0;i<n_recvs;i++) { 7474 source_dest = onodes[i]; 7475 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7476 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7477 ptr_idxs += olengths_idxs[i]; 7478 ptr_vals += olengths_vals[i]; 7479 if (nis) { 7480 source_dest = onodes_is[i]; 7481 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7482 ptr_idxs_is += olengths_idxs_is[i]; 7483 } 7484 if (nvecs) { 7485 source_dest = onodes[i]; 7486 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7487 ptr_vecs += olengths_idxs[i]-2; 7488 } 7489 } 7490 for (i=0;i<n_sends;i++) { 7491 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7492 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7493 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7494 if (nis) { 7495 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7496 } 7497 if (nvecs) { 7498 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7499 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7500 } 7501 } 7502 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7503 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7504 7505 /* assemble new l2g map */ 7506 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7507 ptr_idxs = recv_buffer_idxs; 7508 new_local_rows = 0; 7509 for (i=0;i<n_recvs;i++) { 7510 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7511 ptr_idxs += olengths_idxs[i]; 7512 } 7513 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7514 ptr_idxs = recv_buffer_idxs; 7515 new_local_rows = 0; 7516 for (i=0;i<n_recvs;i++) { 7517 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 7518 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7519 ptr_idxs += olengths_idxs[i]; 7520 } 7521 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7522 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7523 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7524 7525 /* infer new local matrix type from received local matrices type */ 7526 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7527 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7528 if (n_recvs) { 7529 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7530 ptr_idxs = recv_buffer_idxs; 7531 for (i=0;i<n_recvs;i++) { 7532 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7533 new_local_type_private = MATAIJ_PRIVATE; 7534 break; 7535 } 7536 ptr_idxs += olengths_idxs[i]; 7537 } 7538 switch (new_local_type_private) { 7539 case MATDENSE_PRIVATE: 7540 new_local_type = MATSEQAIJ; 7541 bs = 1; 7542 break; 7543 case MATAIJ_PRIVATE: 7544 new_local_type = MATSEQAIJ; 7545 bs = 1; 7546 break; 7547 case MATBAIJ_PRIVATE: 7548 new_local_type = MATSEQBAIJ; 7549 break; 7550 case MATSBAIJ_PRIVATE: 7551 new_local_type = MATSEQSBAIJ; 7552 break; 7553 default: 7554 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7555 break; 7556 } 7557 } else { /* by default, new_local_type is seqaij */ 7558 new_local_type = MATSEQAIJ; 7559 bs = 1; 7560 } 7561 7562 /* create MATIS object if needed */ 7563 if (!reuse) { 7564 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7565 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7566 } else { 7567 /* it also destroys the local matrices */ 7568 if (*mat_n) { 7569 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7570 } else { /* this is a fake object */ 7571 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7572 } 7573 } 7574 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7575 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7576 7577 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7578 7579 /* Global to local map of received indices */ 7580 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7581 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7582 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7583 7584 /* restore attributes -> type of incoming data and its size */ 7585 buf_size_idxs = 0; 7586 for (i=0;i<n_recvs;i++) { 7587 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7588 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7589 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7590 } 7591 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7592 7593 /* set preallocation */ 7594 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7595 if (!newisdense) { 7596 PetscInt *new_local_nnz=0; 7597 7598 ptr_idxs = recv_buffer_idxs_local; 7599 if (n_recvs) { 7600 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7601 } 7602 for (i=0;i<n_recvs;i++) { 7603 PetscInt j; 7604 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7605 for (j=0;j<*(ptr_idxs+1);j++) { 7606 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7607 } 7608 } else { 7609 /* TODO */ 7610 } 7611 ptr_idxs += olengths_idxs[i]; 7612 } 7613 if (new_local_nnz) { 7614 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7615 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7616 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7617 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7618 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7619 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7620 } else { 7621 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7622 } 7623 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7624 } else { 7625 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7626 } 7627 7628 /* set values */ 7629 ptr_vals = recv_buffer_vals; 7630 ptr_idxs = recv_buffer_idxs_local; 7631 for (i=0;i<n_recvs;i++) { 7632 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7633 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7634 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7635 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7636 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7637 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7638 } else { 7639 /* TODO */ 7640 } 7641 ptr_idxs += olengths_idxs[i]; 7642 ptr_vals += olengths_vals[i]; 7643 } 7644 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7645 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7646 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7647 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7648 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7649 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7650 7651 #if 0 7652 if (!restrict_comm) { /* check */ 7653 Vec lvec,rvec; 7654 PetscReal infty_error; 7655 7656 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7657 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7658 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7659 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7660 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7661 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7662 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7663 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7664 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7665 } 7666 #endif 7667 7668 /* assemble new additional is (if any) */ 7669 if (nis) { 7670 PetscInt **temp_idxs,*count_is,j,psum; 7671 7672 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7673 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7674 ptr_idxs = recv_buffer_idxs_is; 7675 psum = 0; 7676 for (i=0;i<n_recvs;i++) { 7677 for (j=0;j<nis;j++) { 7678 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7679 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7680 psum += plen; 7681 ptr_idxs += plen+1; /* shift pointer to received data */ 7682 } 7683 } 7684 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7685 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7686 for (i=1;i<nis;i++) { 7687 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7688 } 7689 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7690 ptr_idxs = recv_buffer_idxs_is; 7691 for (i=0;i<n_recvs;i++) { 7692 for (j=0;j<nis;j++) { 7693 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7694 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7695 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7696 ptr_idxs += plen+1; /* shift pointer to received data */ 7697 } 7698 } 7699 for (i=0;i<nis;i++) { 7700 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7701 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7702 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7703 } 7704 ierr = PetscFree(count_is);CHKERRQ(ierr); 7705 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7706 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7707 } 7708 /* free workspace */ 7709 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7710 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7711 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7712 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7713 if (isdense) { 7714 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7715 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7716 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7717 } else { 7718 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7719 } 7720 if (nis) { 7721 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7722 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7723 } 7724 7725 if (nvecs) { 7726 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7727 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7728 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7729 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7730 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7731 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7732 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7733 /* set values */ 7734 ptr_vals = recv_buffer_vecs; 7735 ptr_idxs = recv_buffer_idxs_local; 7736 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7737 for (i=0;i<n_recvs;i++) { 7738 PetscInt j; 7739 for (j=0;j<*(ptr_idxs+1);j++) { 7740 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7741 } 7742 ptr_idxs += olengths_idxs[i]; 7743 ptr_vals += olengths_idxs[i]-2; 7744 } 7745 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7746 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7747 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7748 } 7749 7750 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7751 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7752 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7753 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7754 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7755 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7756 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7757 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7758 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7759 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7760 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7761 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7762 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7763 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7764 ierr = PetscFree(onodes);CHKERRQ(ierr); 7765 if (nis) { 7766 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7767 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7768 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7769 } 7770 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7771 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7772 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7773 for (i=0;i<nis;i++) { 7774 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7775 } 7776 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7777 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7778 } 7779 *mat_n = NULL; 7780 } 7781 PetscFunctionReturn(0); 7782 } 7783 7784 /* temporary hack into ksp private data structure */ 7785 #include <petsc/private/kspimpl.h> 7786 7787 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7788 { 7789 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7790 PC_IS *pcis = (PC_IS*)pc->data; 7791 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7792 Mat coarsedivudotp = NULL; 7793 Mat coarseG,t_coarse_mat_is; 7794 MatNullSpace CoarseNullSpace = NULL; 7795 ISLocalToGlobalMapping coarse_islg; 7796 IS coarse_is,*isarray; 7797 PetscInt i,im_active=-1,active_procs=-1; 7798 PetscInt nis,nisdofs,nisneu,nisvert; 7799 PetscInt coarse_eqs_per_proc; 7800 PC pc_temp; 7801 PCType coarse_pc_type; 7802 KSPType coarse_ksp_type; 7803 PetscBool multilevel_requested,multilevel_allowed; 7804 PetscBool coarse_reuse; 7805 PetscInt ncoarse,nedcfield; 7806 PetscBool compute_vecs = PETSC_FALSE; 7807 PetscScalar *array; 7808 MatReuse coarse_mat_reuse; 7809 PetscBool restr, full_restr, have_void; 7810 PetscMPIInt size; 7811 PetscErrorCode ierr; 7812 7813 PetscFunctionBegin; 7814 ierr = PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 7815 /* Assign global numbering to coarse dofs */ 7816 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7817 PetscInt ocoarse_size; 7818 compute_vecs = PETSC_TRUE; 7819 7820 pcbddc->new_primal_space = PETSC_TRUE; 7821 ocoarse_size = pcbddc->coarse_size; 7822 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7823 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7824 /* see if we can avoid some work */ 7825 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7826 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7827 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7828 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7829 coarse_reuse = PETSC_FALSE; 7830 } else { /* we can safely reuse already computed coarse matrix */ 7831 coarse_reuse = PETSC_TRUE; 7832 } 7833 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7834 coarse_reuse = PETSC_FALSE; 7835 } 7836 /* reset any subassembling information */ 7837 if (!coarse_reuse || pcbddc->recompute_topography) { 7838 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7839 } 7840 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7841 coarse_reuse = PETSC_TRUE; 7842 } 7843 /* assemble coarse matrix */ 7844 if (coarse_reuse && pcbddc->coarse_ksp) { 7845 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7846 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7847 coarse_mat_reuse = MAT_REUSE_MATRIX; 7848 } else { 7849 coarse_mat = NULL; 7850 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7851 } 7852 7853 /* creates temporary l2gmap and IS for coarse indexes */ 7854 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7855 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7856 7857 /* creates temporary MATIS object for coarse matrix */ 7858 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7859 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7860 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7861 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7862 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7863 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7864 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7865 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7866 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7867 7868 /* count "active" (i.e. with positive local size) and "void" processes */ 7869 im_active = !!(pcis->n); 7870 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7871 7872 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7873 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7874 /* full_restr : just use the receivers from the subassembling pattern */ 7875 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7876 coarse_mat_is = NULL; 7877 multilevel_allowed = PETSC_FALSE; 7878 multilevel_requested = PETSC_FALSE; 7879 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7880 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7881 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 7882 if (multilevel_requested) { 7883 ncoarse = active_procs/pcbddc->coarsening_ratio; 7884 restr = PETSC_FALSE; 7885 full_restr = PETSC_FALSE; 7886 } else { 7887 ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc); 7888 restr = PETSC_TRUE; 7889 full_restr = PETSC_TRUE; 7890 } 7891 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7892 ncoarse = PetscMax(1,ncoarse); 7893 if (!pcbddc->coarse_subassembling) { 7894 if (pcbddc->coarsening_ratio > 1) { 7895 if (multilevel_requested) { 7896 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7897 } else { 7898 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7899 } 7900 } else { 7901 PetscMPIInt rank; 7902 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7903 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 7904 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7905 } 7906 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7907 PetscInt psum; 7908 if (pcbddc->coarse_ksp) psum = 1; 7909 else psum = 0; 7910 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7911 if (ncoarse < size) have_void = PETSC_TRUE; 7912 } 7913 /* determine if we can go multilevel */ 7914 if (multilevel_requested) { 7915 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7916 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7917 } 7918 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7919 7920 /* dump subassembling pattern */ 7921 if (pcbddc->dbg_flag && multilevel_allowed) { 7922 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7923 } 7924 7925 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7926 nedcfield = -1; 7927 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7928 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7929 const PetscInt *idxs; 7930 ISLocalToGlobalMapping tmap; 7931 7932 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7933 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7934 /* allocate space for temporary storage */ 7935 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7936 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7937 /* allocate for IS array */ 7938 nisdofs = pcbddc->n_ISForDofsLocal; 7939 if (pcbddc->nedclocal) { 7940 if (pcbddc->nedfield > -1) { 7941 nedcfield = pcbddc->nedfield; 7942 } else { 7943 nedcfield = 0; 7944 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs); 7945 nisdofs = 1; 7946 } 7947 } 7948 nisneu = !!pcbddc->NeumannBoundariesLocal; 7949 nisvert = 0; /* nisvert is not used */ 7950 nis = nisdofs + nisneu + nisvert; 7951 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7952 /* dofs splitting */ 7953 for (i=0;i<nisdofs;i++) { 7954 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7955 if (nedcfield != i) { 7956 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7957 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7958 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7959 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7960 } else { 7961 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7962 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7963 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7964 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout); 7965 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7966 } 7967 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7968 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7969 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7970 } 7971 /* neumann boundaries */ 7972 if (pcbddc->NeumannBoundariesLocal) { 7973 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7974 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7975 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7976 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7977 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7978 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7979 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7980 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7981 } 7982 /* free memory */ 7983 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7984 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7985 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7986 } else { 7987 nis = 0; 7988 nisdofs = 0; 7989 nisneu = 0; 7990 nisvert = 0; 7991 isarray = NULL; 7992 } 7993 /* destroy no longer needed map */ 7994 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7995 7996 /* subassemble */ 7997 if (multilevel_allowed) { 7998 Vec vp[1]; 7999 PetscInt nvecs = 0; 8000 PetscBool reuse,reuser; 8001 8002 if (coarse_mat) reuse = PETSC_TRUE; 8003 else reuse = PETSC_FALSE; 8004 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8005 vp[0] = NULL; 8006 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8007 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 8008 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 8009 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 8010 nvecs = 1; 8011 8012 if (pcbddc->divudotp) { 8013 Mat B,loc_divudotp; 8014 Vec v,p; 8015 IS dummy; 8016 PetscInt np; 8017 8018 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 8019 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 8020 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 8021 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 8022 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 8023 ierr = VecSet(p,1.);CHKERRQ(ierr); 8024 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 8025 ierr = VecDestroy(&p);CHKERRQ(ierr); 8026 ierr = MatDestroy(&B);CHKERRQ(ierr); 8027 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 8028 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 8029 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 8030 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 8031 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 8032 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8033 ierr = VecDestroy(&v);CHKERRQ(ierr); 8034 } 8035 } 8036 if (reuser) { 8037 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8038 } else { 8039 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8040 } 8041 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8042 PetscScalar *arraym,*arrayv; 8043 PetscInt nl; 8044 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 8045 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 8046 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8047 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 8048 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 8049 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 8050 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8051 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 8052 } else { 8053 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 8054 } 8055 } else { 8056 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 8057 } 8058 if (coarse_mat_is || coarse_mat) { 8059 PetscMPIInt size; 8060 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 8061 if (!multilevel_allowed) { 8062 ierr = MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 8063 } else { 8064 Mat A; 8065 8066 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8067 if (coarse_mat_is) { 8068 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8069 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 8070 coarse_mat = coarse_mat_is; 8071 } 8072 /* be sure we don't have MatSeqDENSE as local mat */ 8073 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 8074 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 8075 } 8076 } 8077 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 8078 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 8079 8080 /* create local to global scatters for coarse problem */ 8081 if (compute_vecs) { 8082 PetscInt lrows; 8083 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 8084 if (coarse_mat) { 8085 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 8086 } else { 8087 lrows = 0; 8088 } 8089 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 8090 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 8091 ierr = VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);CHKERRQ(ierr); 8092 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8093 ierr = VecScatterCreateWithData(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8094 } 8095 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 8096 8097 /* set defaults for coarse KSP and PC */ 8098 if (multilevel_allowed) { 8099 coarse_ksp_type = KSPRICHARDSON; 8100 coarse_pc_type = PCBDDC; 8101 } else { 8102 coarse_ksp_type = KSPPREONLY; 8103 coarse_pc_type = PCREDUNDANT; 8104 } 8105 8106 /* print some info if requested */ 8107 if (pcbddc->dbg_flag) { 8108 if (!multilevel_allowed) { 8109 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8110 if (multilevel_requested) { 8111 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8112 } else if (pcbddc->max_levels) { 8113 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);CHKERRQ(ierr); 8114 } 8115 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8116 } 8117 } 8118 8119 /* communicate coarse discrete gradient */ 8120 coarseG = NULL; 8121 if (pcbddc->nedcG && multilevel_allowed) { 8122 MPI_Comm ccomm; 8123 if (coarse_mat) { 8124 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8125 } else { 8126 ccomm = MPI_COMM_NULL; 8127 } 8128 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 8129 } 8130 8131 /* create the coarse KSP object only once with defaults */ 8132 if (coarse_mat) { 8133 PetscBool isredundant,isnn,isbddc; 8134 PetscViewer dbg_viewer = NULL; 8135 8136 if (pcbddc->dbg_flag) { 8137 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8138 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8139 } 8140 if (!pcbddc->coarse_ksp) { 8141 char prefix[256],str_level[16]; 8142 size_t len; 8143 8144 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 8145 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8146 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 8147 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 8148 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8149 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 8150 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 8151 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8152 /* TODO is this logic correct? should check for coarse_mat type */ 8153 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8154 /* prefix */ 8155 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 8156 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 8157 if (!pcbddc->current_level) { 8158 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));CHKERRQ(ierr); 8159 ierr = PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));CHKERRQ(ierr); 8160 } else { 8161 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 8162 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8163 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8164 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8165 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 8166 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 8167 ierr = PetscStrlcat(prefix,str_level,sizeof(prefix));CHKERRQ(ierr); 8168 } 8169 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 8170 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8171 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8172 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8173 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8174 /* allow user customization */ 8175 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 8176 /* get some info after set from options */ 8177 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8178 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 8179 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8180 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 8181 if (multilevel_allowed && !isbddc && !isnn) { 8182 isbddc = PETSC_TRUE; 8183 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8184 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8185 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8186 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8187 } 8188 } 8189 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8190 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8191 if (nisdofs) { 8192 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 8193 for (i=0;i<nisdofs;i++) { 8194 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8195 } 8196 } 8197 if (nisneu) { 8198 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 8199 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 8200 } 8201 if (nisvert) { 8202 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 8203 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 8204 } 8205 if (coarseG) { 8206 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 8207 } 8208 8209 /* get some info after set from options */ 8210 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8211 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8212 if (isbddc && !multilevel_allowed) { 8213 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8214 isbddc = PETSC_FALSE; 8215 } 8216 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 8217 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 8218 if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) { 8219 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8220 isbddc = PETSC_TRUE; 8221 } 8222 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 8223 if (isredundant) { 8224 KSP inner_ksp; 8225 PC inner_pc; 8226 8227 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 8228 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 8229 } 8230 8231 /* parameters which miss an API */ 8232 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8233 if (isbddc) { 8234 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8235 8236 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8237 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8238 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8239 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8240 if (pcbddc_coarse->benign_saddle_point) { 8241 Mat coarsedivudotp_is; 8242 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8243 IS row,col; 8244 const PetscInt *gidxs; 8245 PetscInt n,st,M,N; 8246 8247 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 8248 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 8249 st = st-n; 8250 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 8251 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 8252 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 8253 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8254 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 8255 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8256 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 8257 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 8258 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 8259 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 8260 ierr = ISDestroy(&row);CHKERRQ(ierr); 8261 ierr = ISDestroy(&col);CHKERRQ(ierr); 8262 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 8263 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8264 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8265 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8266 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8267 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8268 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8269 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8270 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8271 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8272 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8273 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8274 } 8275 } 8276 8277 /* propagate symmetry info of coarse matrix */ 8278 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8279 if (pc->pmat->symmetric_set) { 8280 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8281 } 8282 if (pc->pmat->hermitian_set) { 8283 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8284 } 8285 if (pc->pmat->spd_set) { 8286 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8287 } 8288 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8289 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8290 } 8291 /* set operators */ 8292 ierr = MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");CHKERRQ(ierr); 8293 ierr = MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 8294 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8295 if (pcbddc->dbg_flag) { 8296 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8297 } 8298 } 8299 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8300 ierr = PetscFree(isarray);CHKERRQ(ierr); 8301 #if 0 8302 { 8303 PetscViewer viewer; 8304 char filename[256]; 8305 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8306 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8307 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8308 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8309 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8310 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8311 } 8312 #endif 8313 8314 if (pcbddc->coarse_ksp) { 8315 Vec crhs,csol; 8316 8317 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8318 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8319 if (!csol) { 8320 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8321 } 8322 if (!crhs) { 8323 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8324 } 8325 } 8326 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8327 8328 /* compute null space for coarse solver if the benign trick has been requested */ 8329 if (pcbddc->benign_null) { 8330 8331 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8332 for (i=0;i<pcbddc->benign_n;i++) { 8333 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8334 } 8335 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8336 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8337 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8338 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8339 if (coarse_mat) { 8340 Vec nullv; 8341 PetscScalar *array,*array2; 8342 PetscInt nl; 8343 8344 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8345 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8346 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8347 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8348 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 8349 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8350 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8351 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8352 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8353 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8354 } 8355 } 8356 ierr = PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8357 8358 ierr = PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8359 if (pcbddc->coarse_ksp) { 8360 PetscBool ispreonly; 8361 8362 if (CoarseNullSpace) { 8363 PetscBool isnull; 8364 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8365 if (isnull) { 8366 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8367 } 8368 /* TODO: add local nullspaces (if any) */ 8369 } 8370 /* setup coarse ksp */ 8371 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8372 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8373 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8374 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 8375 KSP check_ksp; 8376 KSPType check_ksp_type; 8377 PC check_pc; 8378 Vec check_vec,coarse_vec; 8379 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8380 PetscInt its; 8381 PetscBool compute_eigs; 8382 PetscReal *eigs_r,*eigs_c; 8383 PetscInt neigs; 8384 const char *prefix; 8385 8386 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8387 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8388 ierr = PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);CHKERRQ(ierr); 8389 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 8390 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8391 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8392 /* prevent from setup unneeded object */ 8393 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8394 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8395 if (ispreonly) { 8396 check_ksp_type = KSPPREONLY; 8397 compute_eigs = PETSC_FALSE; 8398 } else { 8399 check_ksp_type = KSPGMRES; 8400 compute_eigs = PETSC_TRUE; 8401 } 8402 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8403 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8404 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8405 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8406 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8407 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8408 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8409 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8410 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8411 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8412 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8413 /* create random vec */ 8414 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8415 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8416 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8417 /* solve coarse problem */ 8418 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8419 /* set eigenvalue estimation if preonly has not been requested */ 8420 if (compute_eigs) { 8421 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8422 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8423 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8424 if (neigs) { 8425 lambda_max = eigs_r[neigs-1]; 8426 lambda_min = eigs_r[0]; 8427 if (pcbddc->use_coarse_estimates) { 8428 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8429 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8430 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8431 } 8432 } 8433 } 8434 } 8435 8436 /* check coarse problem residual error */ 8437 if (pcbddc->dbg_flag) { 8438 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8439 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8440 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8441 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8442 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8443 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8444 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8445 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8446 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8447 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8448 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8449 if (CoarseNullSpace) { 8450 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8451 } 8452 if (compute_eigs) { 8453 PetscReal lambda_max_s,lambda_min_s; 8454 KSPConvergedReason reason; 8455 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8456 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8457 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8458 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8459 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8460 for (i=0;i<neigs;i++) { 8461 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8462 } 8463 } 8464 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8465 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8466 } 8467 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8468 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8469 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8470 if (compute_eigs) { 8471 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8472 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8473 } 8474 } 8475 } 8476 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8477 /* print additional info */ 8478 if (pcbddc->dbg_flag) { 8479 /* waits until all processes reaches this point */ 8480 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8481 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);CHKERRQ(ierr); 8482 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8483 } 8484 8485 /* free memory */ 8486 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8487 ierr = PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8488 PetscFunctionReturn(0); 8489 } 8490 8491 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8492 { 8493 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8494 PC_IS* pcis = (PC_IS*)pc->data; 8495 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8496 IS subset,subset_mult,subset_n; 8497 PetscInt local_size,coarse_size=0; 8498 PetscInt *local_primal_indices=NULL; 8499 const PetscInt *t_local_primal_indices; 8500 PetscErrorCode ierr; 8501 8502 PetscFunctionBegin; 8503 /* Compute global number of coarse dofs */ 8504 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8505 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8506 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8507 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8508 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8509 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8510 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8511 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8512 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8513 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8514 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8515 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8516 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 8517 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8518 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8519 8520 /* check numbering */ 8521 if (pcbddc->dbg_flag) { 8522 PetscScalar coarsesum,*array,*array2; 8523 PetscInt i; 8524 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8525 8526 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8527 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8528 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8529 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8530 /* counter */ 8531 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8532 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8533 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8534 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8535 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8536 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8537 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8538 for (i=0;i<pcbddc->local_primal_size;i++) { 8539 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8540 } 8541 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8542 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8543 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8544 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8545 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8546 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8547 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8548 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8549 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8550 for (i=0;i<pcis->n;i++) { 8551 if (array[i] != 0.0 && array[i] != array2[i]) { 8552 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8553 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8554 set_error = PETSC_TRUE; 8555 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8556 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8557 } 8558 } 8559 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8560 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8561 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8562 for (i=0;i<pcis->n;i++) { 8563 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8564 } 8565 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8566 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8567 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8568 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8569 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8570 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8571 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8572 PetscInt *gidxs; 8573 8574 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8575 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8576 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8577 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8578 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8579 for (i=0;i<pcbddc->local_primal_size;i++) { 8580 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8581 } 8582 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8583 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8584 } 8585 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8586 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8587 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8588 } 8589 8590 /* get back data */ 8591 *coarse_size_n = coarse_size; 8592 *local_primal_indices_n = local_primal_indices; 8593 PetscFunctionReturn(0); 8594 } 8595 8596 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8597 { 8598 IS localis_t; 8599 PetscInt i,lsize,*idxs,n; 8600 PetscScalar *vals; 8601 PetscErrorCode ierr; 8602 8603 PetscFunctionBegin; 8604 /* get indices in local ordering exploiting local to global map */ 8605 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8606 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8607 for (i=0;i<lsize;i++) vals[i] = 1.0; 8608 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8609 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8610 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8611 if (idxs) { /* multilevel guard */ 8612 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 8613 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8614 } 8615 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8616 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8617 ierr = PetscFree(vals);CHKERRQ(ierr); 8618 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8619 /* now compute set in local ordering */ 8620 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8621 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8622 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8623 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8624 for (i=0,lsize=0;i<n;i++) { 8625 if (PetscRealPart(vals[i]) > 0.5) { 8626 lsize++; 8627 } 8628 } 8629 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8630 for (i=0,lsize=0;i<n;i++) { 8631 if (PetscRealPart(vals[i]) > 0.5) { 8632 idxs[lsize++] = i; 8633 } 8634 } 8635 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8636 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8637 *localis = localis_t; 8638 PetscFunctionReturn(0); 8639 } 8640 8641 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8642 { 8643 PC_IS *pcis=(PC_IS*)pc->data; 8644 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8645 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8646 Mat S_j; 8647 PetscInt *used_xadj,*used_adjncy; 8648 PetscBool free_used_adj; 8649 PetscErrorCode ierr; 8650 8651 PetscFunctionBegin; 8652 ierr = PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8653 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8654 free_used_adj = PETSC_FALSE; 8655 if (pcbddc->sub_schurs_layers == -1) { 8656 used_xadj = NULL; 8657 used_adjncy = NULL; 8658 } else { 8659 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8660 used_xadj = pcbddc->mat_graph->xadj; 8661 used_adjncy = pcbddc->mat_graph->adjncy; 8662 } else if (pcbddc->computed_rowadj) { 8663 used_xadj = pcbddc->mat_graph->xadj; 8664 used_adjncy = pcbddc->mat_graph->adjncy; 8665 } else { 8666 PetscBool flg_row=PETSC_FALSE; 8667 const PetscInt *xadj,*adjncy; 8668 PetscInt nvtxs; 8669 8670 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8671 if (flg_row) { 8672 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8673 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8674 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8675 free_used_adj = PETSC_TRUE; 8676 } else { 8677 pcbddc->sub_schurs_layers = -1; 8678 used_xadj = NULL; 8679 used_adjncy = NULL; 8680 } 8681 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8682 } 8683 } 8684 8685 /* setup sub_schurs data */ 8686 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8687 if (!sub_schurs->schur_explicit) { 8688 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8689 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8690 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8691 } else { 8692 Mat change = NULL; 8693 Vec scaling = NULL; 8694 IS change_primal = NULL, iP; 8695 PetscInt benign_n; 8696 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8697 PetscBool isseqaij,need_change = PETSC_FALSE; 8698 PetscBool discrete_harmonic = PETSC_FALSE; 8699 8700 if (!pcbddc->use_vertices && reuse_solvers) { 8701 PetscInt n_vertices; 8702 8703 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8704 reuse_solvers = (PetscBool)!n_vertices; 8705 } 8706 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8707 if (!isseqaij) { 8708 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8709 if (matis->A == pcbddc->local_mat) { 8710 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8711 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8712 } else { 8713 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8714 } 8715 } 8716 if (!pcbddc->benign_change_explicit) { 8717 benign_n = pcbddc->benign_n; 8718 } else { 8719 benign_n = 0; 8720 } 8721 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8722 We need a global reduction to avoid possible deadlocks. 8723 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8724 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8725 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8726 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8727 need_change = (PetscBool)(!need_change); 8728 } 8729 /* If the user defines additional constraints, we import them here. 8730 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8731 if (need_change) { 8732 PC_IS *pcisf; 8733 PC_BDDC *pcbddcf; 8734 PC pcf; 8735 8736 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8737 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8738 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8739 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8740 8741 /* hacks */ 8742 pcisf = (PC_IS*)pcf->data; 8743 pcisf->is_B_local = pcis->is_B_local; 8744 pcisf->vec1_N = pcis->vec1_N; 8745 pcisf->BtoNmap = pcis->BtoNmap; 8746 pcisf->n = pcis->n; 8747 pcisf->n_B = pcis->n_B; 8748 pcbddcf = (PC_BDDC*)pcf->data; 8749 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8750 pcbddcf->mat_graph = pcbddc->mat_graph; 8751 pcbddcf->use_faces = PETSC_TRUE; 8752 pcbddcf->use_change_of_basis = PETSC_TRUE; 8753 pcbddcf->use_change_on_faces = PETSC_TRUE; 8754 pcbddcf->use_qr_single = PETSC_TRUE; 8755 pcbddcf->fake_change = PETSC_TRUE; 8756 8757 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8758 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8759 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8760 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8761 change = pcbddcf->ConstraintMatrix; 8762 pcbddcf->ConstraintMatrix = NULL; 8763 8764 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8765 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8766 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8767 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8768 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8769 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8770 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8771 pcf->ops->destroy = NULL; 8772 pcf->ops->reset = NULL; 8773 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8774 } 8775 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8776 8777 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 8778 if (iP) { 8779 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 8780 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 8781 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8782 } 8783 if (discrete_harmonic) { 8784 Mat A; 8785 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 8786 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 8787 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 8788 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8789 ierr = MatDestroy(&A);CHKERRQ(ierr); 8790 } else { 8791 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8792 } 8793 ierr = MatDestroy(&change);CHKERRQ(ierr); 8794 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8795 } 8796 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8797 8798 /* free adjacency */ 8799 if (free_used_adj) { 8800 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8801 } 8802 ierr = PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8803 PetscFunctionReturn(0); 8804 } 8805 8806 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8807 { 8808 PC_IS *pcis=(PC_IS*)pc->data; 8809 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8810 PCBDDCGraph graph; 8811 PetscErrorCode ierr; 8812 8813 PetscFunctionBegin; 8814 /* attach interface graph for determining subsets */ 8815 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8816 IS verticesIS,verticescomm; 8817 PetscInt vsize,*idxs; 8818 8819 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8820 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8821 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8822 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8823 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8824 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8825 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8826 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8827 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8828 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8829 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8830 } else { 8831 graph = pcbddc->mat_graph; 8832 } 8833 /* print some info */ 8834 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8835 IS vertices; 8836 PetscInt nv,nedges,nfaces; 8837 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8838 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8839 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8840 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8841 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8842 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);CHKERRQ(ierr); 8843 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);CHKERRQ(ierr); 8844 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8845 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8846 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8847 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8848 } 8849 8850 /* sub_schurs init */ 8851 if (!pcbddc->sub_schurs) { 8852 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8853 } 8854 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8855 8856 /* free graph struct */ 8857 if (pcbddc->sub_schurs_rebuild) { 8858 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8859 } 8860 PetscFunctionReturn(0); 8861 } 8862 8863 PetscErrorCode PCBDDCCheckOperator(PC pc) 8864 { 8865 PC_IS *pcis=(PC_IS*)pc->data; 8866 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8867 PetscErrorCode ierr; 8868 8869 PetscFunctionBegin; 8870 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8871 IS zerodiag = NULL; 8872 Mat S_j,B0_B=NULL; 8873 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8874 PetscScalar *p0_check,*array,*array2; 8875 PetscReal norm; 8876 PetscInt i; 8877 8878 /* B0 and B0_B */ 8879 if (zerodiag) { 8880 IS dummy; 8881 8882 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8883 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8884 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8885 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8886 } 8887 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8888 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8889 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8890 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8891 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8892 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8893 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8894 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8895 /* S_j */ 8896 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8897 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8898 8899 /* mimic vector in \widetilde{W}_\Gamma */ 8900 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8901 /* continuous in primal space */ 8902 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8903 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8904 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8905 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8906 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8907 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8908 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8909 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8910 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8911 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8912 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8913 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8914 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8915 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8916 8917 /* assemble rhs for coarse problem */ 8918 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8919 /* local with Schur */ 8920 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8921 if (zerodiag) { 8922 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8923 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8924 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8925 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8926 } 8927 /* sum on primal nodes the local contributions */ 8928 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8929 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8930 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8931 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8932 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8933 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8934 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8935 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8936 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8937 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8938 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8939 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8940 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8941 /* scale primal nodes (BDDC sums contibutions) */ 8942 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8943 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8944 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8945 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8946 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8947 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8948 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8949 /* global: \widetilde{B0}_B w_\Gamma */ 8950 if (zerodiag) { 8951 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8952 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8953 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8954 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8955 } 8956 /* BDDC */ 8957 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8958 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8959 8960 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8961 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8962 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8963 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);CHKERRQ(ierr); 8964 for (i=0;i<pcbddc->benign_n;i++) { 8965 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));CHKERRQ(ierr); 8966 } 8967 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8968 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8969 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8970 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8971 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8972 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8973 } 8974 PetscFunctionReturn(0); 8975 } 8976 8977 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8978 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8979 { 8980 Mat At; 8981 IS rows; 8982 PetscInt rst,ren; 8983 PetscErrorCode ierr; 8984 PetscLayout rmap; 8985 8986 PetscFunctionBegin; 8987 rst = ren = 0; 8988 if (ccomm != MPI_COMM_NULL) { 8989 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8990 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8991 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8992 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8993 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8994 } 8995 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8996 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8997 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8998 8999 if (ccomm != MPI_COMM_NULL) { 9000 Mat_MPIAIJ *a,*b; 9001 IS from,to; 9002 Vec gvec; 9003 PetscInt lsize; 9004 9005 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 9006 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 9007 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 9008 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 9009 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 9010 a = (Mat_MPIAIJ*)At->data; 9011 b = (Mat_MPIAIJ*)(*B)->data; 9012 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 9013 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 9014 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 9015 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 9016 b->A = a->A; 9017 b->B = a->B; 9018 9019 b->donotstash = a->donotstash; 9020 b->roworiented = a->roworiented; 9021 b->rowindices = 0; 9022 b->rowvalues = 0; 9023 b->getrowactive = PETSC_FALSE; 9024 9025 (*B)->rmap = rmap; 9026 (*B)->factortype = A->factortype; 9027 (*B)->assembled = PETSC_TRUE; 9028 (*B)->insertmode = NOT_SET_VALUES; 9029 (*B)->preallocated = PETSC_TRUE; 9030 9031 if (a->colmap) { 9032 #if defined(PETSC_USE_CTABLE) 9033 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 9034 #else 9035 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 9036 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9037 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9038 #endif 9039 } else b->colmap = 0; 9040 if (a->garray) { 9041 PetscInt len; 9042 len = a->B->cmap->n; 9043 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 9044 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 9045 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 9046 } else b->garray = 0; 9047 9048 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 9049 b->lvec = a->lvec; 9050 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 9051 9052 /* cannot use VecScatterCopy */ 9053 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 9054 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 9055 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 9056 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 9057 ierr = VecScatterCreateWithData(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 9058 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 9059 ierr = ISDestroy(&from);CHKERRQ(ierr); 9060 ierr = ISDestroy(&to);CHKERRQ(ierr); 9061 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 9062 } 9063 ierr = MatDestroy(&At);CHKERRQ(ierr); 9064 PetscFunctionReturn(0); 9065 } 9066