1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 #if !defined(PETSC_USE_COMPLEX) 18 PetscScalar *uwork,*data,*U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM,bN,lwork,lierr,di = 1; 21 PetscInt ulw,i,nr,nc,n; 22 PetscErrorCode ierr; 23 24 PetscFunctionBegin; 25 #if defined(PETSC_MISSING_LAPACK_GESVD) 26 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 27 #else 28 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 29 if (!nr || !nc) PetscFunctionReturn(0); 30 31 /* workspace */ 32 if (!work) { 33 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 34 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 35 } else { 36 ulw = lw; 37 uwork = work; 38 } 39 n = PetscMin(nr,nc); 40 if (!rwork) { 41 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 42 } else { 43 sing = rwork; 44 } 45 46 /* SVD */ 47 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 49 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 50 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 51 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 52 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 53 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 54 ierr = PetscFPTrapPop();CHKERRQ(ierr); 55 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 56 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 57 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 58 if (!rwork) { 59 ierr = PetscFree(sing);CHKERRQ(ierr); 60 } 61 if (!work) { 62 ierr = PetscFree(uwork);CHKERRQ(ierr); 63 } 64 /* create B */ 65 if (!range) { 66 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 67 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 68 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 69 } else { 70 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 71 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 72 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 73 } 74 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 75 ierr = PetscFree(U);CHKERRQ(ierr); 76 #endif 77 #else /* PETSC_USE_COMPLEX */ 78 PetscFunctionBegin; 79 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 80 #endif 81 PetscFunctionReturn(0); 82 } 83 84 /* TODO REMOVE */ 85 #if defined(PRINT_GDET) 86 static int inc = 0; 87 static int lev = 0; 88 #endif 89 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 PetscScalar *vals,v; 122 123 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 124 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 125 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 126 /* v = PetscAbsScalar(vals[0]) */; 127 v = 1.; 128 cvals[0] = vals[0]/v; 129 cvals[1] = vals[1]/v; 130 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 131 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 132 #if defined(PRINT_GDET) 133 { 134 PetscViewer viewer; 135 char filename[256]; 136 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 137 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 138 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 140 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 141 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 142 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 143 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 144 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 145 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 146 } 147 #endif 148 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 149 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 150 } 151 152 PetscFunctionReturn(0); 153 } 154 155 PetscErrorCode PCBDDCNedelecSupport(PC pc) 156 { 157 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 158 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 159 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 160 Vec tvec; 161 PetscSF sfv; 162 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 163 MPI_Comm comm; 164 IS lned,primals,allprimals,nedfieldlocal; 165 IS *eedges,*extrows,*extcols,*alleedges; 166 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 167 PetscScalar *vals,*work; 168 PetscReal *rwork; 169 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 170 PetscInt ne,nv,Lv,order,n,field; 171 PetscInt n_neigh,*neigh,*n_shared,**shared; 172 PetscInt i,j,extmem,cum,maxsize,nee; 173 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 174 PetscInt *sfvleaves,*sfvroots; 175 PetscInt *corners,*cedges; 176 PetscInt *ecount,**eneighs,*vcount,**vneighs; 177 #if defined(PETSC_USE_DEBUG) 178 PetscInt *emarks; 179 #endif 180 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 181 PetscErrorCode ierr; 182 183 PetscFunctionBegin; 184 /* If the discrete gradient is defined for a subset of dofs and global is true, 185 it assumes G is given in global ordering for all the dofs. 186 Otherwise, the ordering is global for the Nedelec field */ 187 order = pcbddc->nedorder; 188 conforming = pcbddc->conforming; 189 field = pcbddc->nedfield; 190 global = pcbddc->nedglobal; 191 setprimal = PETSC_FALSE; 192 print = PETSC_FALSE; 193 singular = PETSC_FALSE; 194 195 /* Command line customization */ 196 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 199 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 200 /* print debug info TODO: to be removed */ 201 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 202 ierr = PetscOptionsEnd();CHKERRQ(ierr); 203 204 /* Return if there are no edges in the decomposition and the problem is not singular */ 205 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 206 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 207 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 208 if (!singular) { 209 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 210 lrc[0] = PETSC_FALSE; 211 for (i=0;i<n;i++) { 212 if (PetscRealPart(vals[i]) > 2.) { 213 lrc[0] = PETSC_TRUE; 214 break; 215 } 216 } 217 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 218 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 219 if (!lrc[1]) PetscFunctionReturn(0); 220 } 221 222 /* Get Nedelec field */ 223 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal); 224 if (pcbddc->n_ISForDofsLocal && field >= 0) { 225 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 226 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 227 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 228 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 229 ne = n; 230 nedfieldlocal = NULL; 231 global = PETSC_TRUE; 232 } else if (field == PETSC_DECIDE) { 233 PetscInt rst,ren,*idx; 234 235 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 236 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 237 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 238 for (i=rst;i<ren;i++) { 239 PetscInt nc; 240 241 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 242 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 243 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 244 } 245 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 246 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 247 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 248 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 249 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 250 } else { 251 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 252 } 253 254 /* Sanity checks */ 255 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 256 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 257 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order); 258 259 /* Just set primal dofs and return */ 260 if (setprimal) { 261 IS enedfieldlocal; 262 PetscInt *eidxs; 263 264 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 265 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 266 if (nedfieldlocal) { 267 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 268 for (i=0,cum=0;i<ne;i++) { 269 if (PetscRealPart(vals[idxs[i]]) > 2.) { 270 eidxs[cum++] = idxs[i]; 271 } 272 } 273 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 274 } else { 275 for (i=0,cum=0;i<ne;i++) { 276 if (PetscRealPart(vals[i]) > 2.) { 277 eidxs[cum++] = i; 278 } 279 } 280 } 281 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 282 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 283 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 284 ierr = PetscFree(eidxs);CHKERRQ(ierr); 285 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 286 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 /* Compute some l2g maps */ 291 if (nedfieldlocal) { 292 IS is; 293 294 /* need to map from the local Nedelec field to local numbering */ 295 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 296 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 297 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 298 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 299 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 300 if (global) { 301 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 302 el2g = al2g; 303 } else { 304 IS gis; 305 306 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 307 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 308 ierr = ISDestroy(&gis);CHKERRQ(ierr); 309 } 310 ierr = ISDestroy(&is);CHKERRQ(ierr); 311 } else { 312 /* restore default */ 313 pcbddc->nedfield = -1; 314 /* one ref for the destruction of al2g, one for el2g */ 315 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 316 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 317 el2g = al2g; 318 fl2g = NULL; 319 } 320 321 /* Start communication to drop connections for interior edges (for cc analysis only) */ 322 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 323 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 324 if (nedfieldlocal) { 325 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 326 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 327 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 328 } else { 329 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 330 } 331 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 332 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 333 334 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 335 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 336 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 337 if (global) { 338 PetscInt rst; 339 340 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 341 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 342 if (matis->sf_rootdata[i] < 2) { 343 matis->sf_rootdata[cum++] = i + rst; 344 } 345 } 346 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 347 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 348 } else { 349 PetscInt *tbz; 350 351 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 352 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 353 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 354 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 355 for (i=0,cum=0;i<ne;i++) 356 if (matis->sf_leafdata[idxs[i]] == 1) 357 tbz[cum++] = i; 358 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 359 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 360 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 361 ierr = PetscFree(tbz);CHKERRQ(ierr); 362 } 363 } else { /* we need the entire G to infer the nullspace */ 364 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 365 G = pcbddc->discretegradient; 366 } 367 368 /* Extract subdomain relevant rows of G */ 369 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 370 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 371 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 372 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 373 ierr = ISDestroy(&lned);CHKERRQ(ierr); 374 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 375 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 376 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 377 378 /* SF for nodal dofs communications */ 379 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 380 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 381 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 382 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 383 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 384 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 385 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 386 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 387 i = singular ? 2 : 1; 388 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 389 390 /* Destroy temporary G created in MATIS format and modified G */ 391 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 392 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 393 ierr = MatDestroy(&G);CHKERRQ(ierr); 394 395 if (print) { 396 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 397 ierr = MatView(lG,NULL);CHKERRQ(ierr); 398 } 399 400 /* Save lG for values insertion in change of basis */ 401 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 402 403 /* Analyze the edge-nodes connections (duplicate lG) */ 404 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 405 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 406 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 407 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 409 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 410 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 411 /* need to import the boundary specification to ensure the 412 proper detection of coarse edges' endpoints */ 413 if (pcbddc->DirichletBoundariesLocal) { 414 IS is; 415 416 if (fl2g) { 417 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 418 } else { 419 is = pcbddc->DirichletBoundariesLocal; 420 } 421 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 422 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 423 for (i=0;i<cum;i++) { 424 if (idxs[i] >= 0) { 425 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 426 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 427 } 428 } 429 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 430 if (fl2g) { 431 ierr = ISDestroy(&is);CHKERRQ(ierr); 432 } 433 } 434 if (pcbddc->NeumannBoundariesLocal) { 435 IS is; 436 437 if (fl2g) { 438 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 439 } else { 440 is = pcbddc->NeumannBoundariesLocal; 441 } 442 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 443 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 444 for (i=0;i<cum;i++) { 445 if (idxs[i] >= 0) { 446 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 447 } 448 } 449 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 450 if (fl2g) { 451 ierr = ISDestroy(&is);CHKERRQ(ierr); 452 } 453 } 454 455 /* Count neighs per dof */ 456 ierr = ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 457 ierr = ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 458 459 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 460 for proper detection of coarse edges' endpoints */ 461 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 462 for (i=0;i<ne;i++) { 463 if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) { 464 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 465 } 466 } 467 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 468 if (!conforming) { 469 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 470 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 471 } 472 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 473 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 474 cum = 0; 475 for (i=0;i<ne;i++) { 476 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 477 if (!PetscBTLookup(btee,i)) { 478 marks[cum++] = i; 479 continue; 480 } 481 /* set badly connected edge dofs as primal */ 482 if (!conforming) { 483 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 484 marks[cum++] = i; 485 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 486 for (j=ii[i];j<ii[i+1];j++) { 487 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 488 } 489 } else { 490 /* every edge dofs should be connected trough a certain number of nodal dofs 491 to other edge dofs belonging to coarse edges 492 - at most 2 endpoints 493 - order-1 interior nodal dofs 494 - no undefined nodal dofs (nconn < order) 495 */ 496 PetscInt ends = 0,ints = 0, undef = 0; 497 for (j=ii[i];j<ii[i+1];j++) { 498 PetscInt v = jj[j],k; 499 PetscInt nconn = iit[v+1]-iit[v]; 500 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 501 if (nconn > order) ends++; 502 else if (nconn == order) ints++; 503 else undef++; 504 } 505 if (undef || ends > 2 || ints != order -1) { 506 marks[cum++] = i; 507 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 508 for (j=ii[i];j<ii[i+1];j++) { 509 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 510 } 511 } 512 } 513 } 514 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 515 if (!order && ii[i+1] != ii[i]) { 516 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 517 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 518 } 519 } 520 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 521 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 522 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 523 if (!conforming) { 524 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 525 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 526 } 527 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 528 529 /* identify splitpoints and corner candidates */ 530 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 531 if (print) { 532 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 533 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 534 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 535 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 536 } 537 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 538 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 539 for (i=0;i<nv;i++) { 540 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 541 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 542 if (!order) { /* variable order */ 543 PetscReal vorder = 0.; 544 545 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 546 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 547 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test); 548 ord = 1; 549 } 550 #if defined(PETSC_USE_DEBUG) 551 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %D connected with nodal dof %D with order %D",test,i,ord); 552 #endif 553 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 554 if (PetscBTLookup(btbd,jj[j])) { 555 bdir = PETSC_TRUE; 556 break; 557 } 558 if (vc != ecount[jj[j]]) { 559 sneighs = PETSC_FALSE; 560 } else { 561 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 562 for (k=0;k<vc;k++) { 563 if (vn[k] != en[k]) { 564 sneighs = PETSC_FALSE; 565 break; 566 } 567 } 568 } 569 } 570 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 571 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir); 572 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 573 } else if (test == ord) { 574 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 575 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i); 576 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 577 } else { 578 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i); 579 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 580 } 581 } 582 } 583 ierr = ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 584 ierr = ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 585 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 586 587 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 588 if (order != 1) { 589 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 590 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 591 for (i=0;i<nv;i++) { 592 if (PetscBTLookup(btvcand,i)) { 593 PetscBool found = PETSC_FALSE; 594 for (j=ii[i];j<ii[i+1] && !found;j++) { 595 PetscInt k,e = jj[j]; 596 if (PetscBTLookup(bte,e)) continue; 597 for (k=iit[e];k<iit[e+1];k++) { 598 PetscInt v = jjt[k]; 599 if (v != i && PetscBTLookup(btvcand,v)) { 600 found = PETSC_TRUE; 601 break; 602 } 603 } 604 } 605 if (!found) { 606 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D CLEARED\n",i); 607 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 608 } else { 609 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D ACCEPTED\n",i); 610 } 611 } 612 } 613 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 614 } 615 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 616 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 617 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 618 619 /* Get the local G^T explicitly */ 620 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 621 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 622 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 623 624 /* Mark interior nodal dofs */ 625 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 626 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 627 for (i=1;i<n_neigh;i++) { 628 for (j=0;j<n_shared[i];j++) { 629 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 630 } 631 } 632 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 633 634 /* communicate corners and splitpoints */ 635 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 636 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 637 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 638 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 639 640 if (print) { 641 IS tbz; 642 643 cum = 0; 644 for (i=0;i<nv;i++) 645 if (sfvleaves[i]) 646 vmarks[cum++] = i; 647 648 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 649 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 650 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 651 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 652 } 653 654 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 655 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 656 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 657 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 658 659 /* Zero rows of lGt corresponding to identified corners 660 and interior nodal dofs */ 661 cum = 0; 662 for (i=0;i<nv;i++) { 663 if (sfvleaves[i]) { 664 vmarks[cum++] = i; 665 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 666 } 667 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 668 } 669 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 670 if (print) { 671 IS tbz; 672 673 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 674 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 675 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 676 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 677 } 678 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 679 ierr = PetscFree(vmarks);CHKERRQ(ierr); 680 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 681 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 682 683 /* Recompute G */ 684 ierr = MatDestroy(&lG);CHKERRQ(ierr); 685 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 686 if (print) { 687 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 688 ierr = MatView(lG,NULL);CHKERRQ(ierr); 689 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 690 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 691 } 692 693 /* Get primal dofs (if any) */ 694 cum = 0; 695 for (i=0;i<ne;i++) { 696 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 697 } 698 if (fl2g) { 699 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 700 } 701 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 702 if (print) { 703 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 704 ierr = ISView(primals,NULL);CHKERRQ(ierr); 705 } 706 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 707 /* TODO: what if the user passed in some of them ? */ 708 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 709 ierr = ISDestroy(&primals);CHKERRQ(ierr); 710 711 /* Compute edge connectivity */ 712 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 713 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 714 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 715 if (fl2g) { 716 PetscBT btf; 717 PetscInt *iia,*jja,*iiu,*jju; 718 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 719 720 /* create CSR for all local dofs */ 721 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 722 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 723 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n); 724 iiu = pcbddc->mat_graph->xadj; 725 jju = pcbddc->mat_graph->adjncy; 726 } else if (pcbddc->use_local_adj) { 727 rest = PETSC_TRUE; 728 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 729 } else { 730 free = PETSC_TRUE; 731 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 732 iiu[0] = 0; 733 for (i=0;i<n;i++) { 734 iiu[i+1] = i+1; 735 jju[i] = -1; 736 } 737 } 738 739 /* import sizes of CSR */ 740 iia[0] = 0; 741 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 742 743 /* overwrite entries corresponding to the Nedelec field */ 744 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 745 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 746 for (i=0;i<ne;i++) { 747 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 748 iia[idxs[i]+1] = ii[i+1]-ii[i]; 749 } 750 751 /* iia in CSR */ 752 for (i=0;i<n;i++) iia[i+1] += iia[i]; 753 754 /* jja in CSR */ 755 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 756 for (i=0;i<n;i++) 757 if (!PetscBTLookup(btf,i)) 758 for (j=0;j<iiu[i+1]-iiu[i];j++) 759 jja[iia[i]+j] = jju[iiu[i]+j]; 760 761 /* map edge dofs connectivity */ 762 if (jj) { 763 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 764 for (i=0;i<ne;i++) { 765 PetscInt e = idxs[i]; 766 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 767 } 768 } 769 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 770 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 771 if (rest) { 772 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 773 } 774 if (free) { 775 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 776 } 777 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 778 } else { 779 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 780 } 781 782 /* Analyze interface for edge dofs */ 783 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 784 pcbddc->mat_graph->twodim = PETSC_FALSE; 785 786 /* Get coarse edges in the edge space */ 787 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 788 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 789 790 if (fl2g) { 791 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 792 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 793 for (i=0;i<nee;i++) { 794 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 795 } 796 } else { 797 eedges = alleedges; 798 primals = allprimals; 799 } 800 801 /* Mark fine edge dofs with their coarse edge id */ 802 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 803 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 804 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 805 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 806 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 807 if (print) { 808 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 809 ierr = ISView(primals,NULL);CHKERRQ(ierr); 810 } 811 812 maxsize = 0; 813 for (i=0;i<nee;i++) { 814 PetscInt size,mark = i+1; 815 816 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 817 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 818 for (j=0;j<size;j++) marks[idxs[j]] = mark; 819 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 820 maxsize = PetscMax(maxsize,size); 821 } 822 823 /* Find coarse edge endpoints */ 824 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 825 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 826 for (i=0;i<nee;i++) { 827 PetscInt mark = i+1,size; 828 829 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 830 if (!size && nedfieldlocal) continue; 831 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 832 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 833 if (print) { 834 ierr = PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);CHKERRQ(ierr); 835 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 836 } 837 for (j=0;j<size;j++) { 838 PetscInt k, ee = idxs[j]; 839 if (print) PetscPrintf(PETSC_COMM_SELF," idx %D\n",ee); 840 for (k=ii[ee];k<ii[ee+1];k++) { 841 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %D\n",jj[k]); 842 if (PetscBTLookup(btv,jj[k])) { 843 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %D\n",jj[k]); 844 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 845 PetscInt k2; 846 PetscBool corner = PETSC_FALSE; 847 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 848 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 849 /* it's a corner if either is connected with an edge dof belonging to a different cc or 850 if the edge dof lie on the natural part of the boundary */ 851 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 852 corner = PETSC_TRUE; 853 break; 854 } 855 } 856 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 857 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %D\n",jj[k]); 858 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 859 } else { 860 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 861 } 862 } 863 } 864 } 865 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 866 } 867 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 868 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 869 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 870 871 /* Reset marked primal dofs */ 872 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 873 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 874 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 875 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 876 877 /* Now use the initial lG */ 878 ierr = MatDestroy(&lG);CHKERRQ(ierr); 879 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 880 lG = lGinit; 881 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 882 883 /* Compute extended cols indices */ 884 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 885 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 886 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 887 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 888 i *= maxsize; 889 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 890 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 891 eerr = PETSC_FALSE; 892 for (i=0;i<nee;i++) { 893 PetscInt size,found = 0; 894 895 cum = 0; 896 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 897 if (!size && nedfieldlocal) continue; 898 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 899 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 900 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 901 for (j=0;j<size;j++) { 902 PetscInt k,ee = idxs[j]; 903 for (k=ii[ee];k<ii[ee+1];k++) { 904 PetscInt vv = jj[k]; 905 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 906 else if (!PetscBTLookupSet(btvc,vv)) found++; 907 } 908 } 909 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 910 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 911 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 912 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 913 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 914 /* it may happen that endpoints are not defined at this point 915 if it is the case, mark this edge for a second pass */ 916 if (cum != size -1 || found != 2) { 917 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 918 if (print) { 919 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 920 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 921 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 922 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 923 } 924 eerr = PETSC_TRUE; 925 } 926 } 927 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 928 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 929 if (done) { 930 PetscInt *newprimals; 931 932 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 933 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 934 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 935 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 936 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 937 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 938 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr); 939 for (i=0;i<nee;i++) { 940 PetscBool has_candidates = PETSC_FALSE; 941 if (PetscBTLookup(bter,i)) { 942 PetscInt size,mark = i+1; 943 944 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 945 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 946 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 947 for (j=0;j<size;j++) { 948 PetscInt k,ee = idxs[j]; 949 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]); 950 for (k=ii[ee];k<ii[ee+1];k++) { 951 /* set all candidates located on the edge as corners */ 952 if (PetscBTLookup(btvcand,jj[k])) { 953 PetscInt k2,vv = jj[k]; 954 has_candidates = PETSC_TRUE; 955 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %D\n",vv); 956 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 957 /* set all edge dofs connected to candidate as primals */ 958 for (k2=iit[vv];k2<iit[vv+1];k2++) { 959 if (marks[jjt[k2]] == mark) { 960 PetscInt k3,ee2 = jjt[k2]; 961 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %D\n",ee2); 962 newprimals[cum++] = ee2; 963 /* finally set the new corners */ 964 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 965 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %D\n",jj[k3]); 966 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 967 } 968 } 969 } 970 } else { 971 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %D\n",jj[k]); 972 } 973 } 974 } 975 if (!has_candidates) { /* circular edge */ 976 PetscInt k, ee = idxs[0],*tmarks; 977 978 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 979 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %D\n",i); 980 for (k=ii[ee];k<ii[ee+1];k++) { 981 PetscInt k2; 982 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %D\n",jj[k]); 983 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 984 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 985 } 986 for (j=0;j<size;j++) { 987 if (tmarks[idxs[j]] > 1) { 988 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %D\n",idxs[j]); 989 newprimals[cum++] = idxs[j]; 990 } 991 } 992 ierr = PetscFree(tmarks);CHKERRQ(ierr); 993 } 994 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 995 } 996 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 997 } 998 ierr = PetscFree(extcols);CHKERRQ(ierr); 999 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1000 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1001 if (fl2g) { 1002 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1003 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1004 for (i=0;i<nee;i++) { 1005 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1006 } 1007 ierr = PetscFree(eedges);CHKERRQ(ierr); 1008 } 1009 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1010 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1011 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1012 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1013 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1014 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1015 pcbddc->mat_graph->twodim = PETSC_FALSE; 1016 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1017 if (fl2g) { 1018 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1019 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1020 for (i=0;i<nee;i++) { 1021 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1022 } 1023 } else { 1024 eedges = alleedges; 1025 primals = allprimals; 1026 } 1027 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1028 1029 /* Mark again */ 1030 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1031 for (i=0;i<nee;i++) { 1032 PetscInt size,mark = i+1; 1033 1034 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1035 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1036 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1037 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1038 } 1039 if (print) { 1040 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1041 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1042 } 1043 1044 /* Recompute extended cols */ 1045 eerr = PETSC_FALSE; 1046 for (i=0;i<nee;i++) { 1047 PetscInt size; 1048 1049 cum = 0; 1050 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1051 if (!size && nedfieldlocal) continue; 1052 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1053 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1054 for (j=0;j<size;j++) { 1055 PetscInt k,ee = idxs[j]; 1056 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1057 } 1058 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1059 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1060 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1061 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1062 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1063 if (cum != size -1) { 1064 if (print) { 1065 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1066 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1067 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1068 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1069 } 1070 eerr = PETSC_TRUE; 1071 } 1072 } 1073 } 1074 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1075 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1076 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1077 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1078 /* an error should not occur at this point */ 1079 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1080 1081 /* Check the number of endpoints */ 1082 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1083 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1084 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1085 for (i=0;i<nee;i++) { 1086 PetscInt size, found = 0, gc[2]; 1087 1088 /* init with defaults */ 1089 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1090 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1091 if (!size && nedfieldlocal) continue; 1092 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1093 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1094 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1095 for (j=0;j<size;j++) { 1096 PetscInt k,ee = idxs[j]; 1097 for (k=ii[ee];k<ii[ee+1];k++) { 1098 PetscInt vv = jj[k]; 1099 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1100 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i); 1101 corners[i*2+found++] = vv; 1102 } 1103 } 1104 } 1105 if (found != 2) { 1106 PetscInt e; 1107 if (fl2g) { 1108 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1109 } else { 1110 e = idxs[0]; 1111 } 1112 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]); 1113 } 1114 1115 /* get primal dof index on this coarse edge */ 1116 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1117 if (gc[0] > gc[1]) { 1118 PetscInt swap = corners[2*i]; 1119 corners[2*i] = corners[2*i+1]; 1120 corners[2*i+1] = swap; 1121 } 1122 cedges[i] = idxs[size-1]; 1123 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1124 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1125 } 1126 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1127 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1128 1129 #if defined(PETSC_USE_DEBUG) 1130 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1131 not interfere with neighbouring coarse edges */ 1132 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1133 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1134 for (i=0;i<nv;i++) { 1135 PetscInt emax = 0,eemax = 0; 1136 1137 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1138 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1139 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1140 for (j=1;j<nee+1;j++) { 1141 if (emax < emarks[j]) { 1142 emax = emarks[j]; 1143 eemax = j; 1144 } 1145 } 1146 /* not relevant for edges */ 1147 if (!eemax) continue; 1148 1149 for (j=ii[i];j<ii[i+1];j++) { 1150 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1151 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]); 1152 } 1153 } 1154 } 1155 ierr = PetscFree(emarks);CHKERRQ(ierr); 1156 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1157 #endif 1158 1159 /* Compute extended rows indices for edge blocks of the change of basis */ 1160 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1161 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1162 extmem *= maxsize; 1163 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1164 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1165 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1166 for (i=0;i<nv;i++) { 1167 PetscInt mark = 0,size,start; 1168 1169 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1170 for (j=ii[i];j<ii[i+1];j++) 1171 if (marks[jj[j]] && !mark) 1172 mark = marks[jj[j]]; 1173 1174 /* not relevant */ 1175 if (!mark) continue; 1176 1177 /* import extended row */ 1178 mark--; 1179 start = mark*extmem+extrowcum[mark]; 1180 size = ii[i+1]-ii[i]; 1181 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem); 1182 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1183 extrowcum[mark] += size; 1184 } 1185 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1186 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1187 ierr = PetscFree(marks);CHKERRQ(ierr); 1188 1189 /* Compress extrows */ 1190 cum = 0; 1191 for (i=0;i<nee;i++) { 1192 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1193 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1194 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1195 cum = PetscMax(cum,size); 1196 } 1197 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1198 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1199 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1200 1201 /* Workspace for lapack inner calls and VecSetValues */ 1202 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1203 1204 /* Create change of basis matrix (preallocation can be improved) */ 1205 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1206 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1207 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1208 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1209 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1210 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1211 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1212 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1213 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1214 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1215 1216 /* Defaults to identity */ 1217 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1218 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1219 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1220 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1221 1222 /* Create discrete gradient for the coarser level if needed */ 1223 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1224 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1225 if (pcbddc->current_level < pcbddc->max_levels) { 1226 ISLocalToGlobalMapping cel2g,cvl2g; 1227 IS wis,gwis; 1228 PetscInt cnv,cne; 1229 1230 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1231 if (fl2g) { 1232 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1233 } else { 1234 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1235 pcbddc->nedclocal = wis; 1236 } 1237 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1238 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1239 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1240 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1241 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1242 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1243 1244 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1245 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1246 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1247 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1248 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1249 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1250 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1251 1252 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1253 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1254 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1255 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1256 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1257 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1258 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1259 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1260 } 1261 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1262 1263 #if defined(PRINT_GDET) 1264 inc = 0; 1265 lev = pcbddc->current_level; 1266 #endif 1267 1268 /* Insert values in the change of basis matrix */ 1269 for (i=0;i<nee;i++) { 1270 Mat Gins = NULL, GKins = NULL; 1271 IS cornersis = NULL; 1272 PetscScalar cvals[2]; 1273 1274 if (pcbddc->nedcG) { 1275 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1276 } 1277 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1278 if (Gins && GKins) { 1279 PetscScalar *data; 1280 const PetscInt *rows,*cols; 1281 PetscInt nrh,nch,nrc,ncc; 1282 1283 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1284 /* H1 */ 1285 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1286 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1287 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1288 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1289 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1290 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1291 /* complement */ 1292 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1293 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i); 1294 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i); 1295 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc); 1296 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1297 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1298 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1299 1300 /* coarse discrete gradient */ 1301 if (pcbddc->nedcG) { 1302 PetscInt cols[2]; 1303 1304 cols[0] = 2*i; 1305 cols[1] = 2*i+1; 1306 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1307 } 1308 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1309 } 1310 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1311 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1312 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1313 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1314 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1315 } 1316 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1317 1318 /* Start assembling */ 1319 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1320 if (pcbddc->nedcG) { 1321 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1322 } 1323 1324 /* Free */ 1325 if (fl2g) { 1326 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1327 for (i=0;i<nee;i++) { 1328 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1329 } 1330 ierr = PetscFree(eedges);CHKERRQ(ierr); 1331 } 1332 1333 /* hack mat_graph with primal dofs on the coarse edges */ 1334 { 1335 PCBDDCGraph graph = pcbddc->mat_graph; 1336 PetscInt *oqueue = graph->queue; 1337 PetscInt *ocptr = graph->cptr; 1338 PetscInt ncc,*idxs; 1339 1340 /* find first primal edge */ 1341 if (pcbddc->nedclocal) { 1342 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1343 } else { 1344 if (fl2g) { 1345 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1346 } 1347 idxs = cedges; 1348 } 1349 cum = 0; 1350 while (cum < nee && cedges[cum] < 0) cum++; 1351 1352 /* adapt connected components */ 1353 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1354 graph->cptr[0] = 0; 1355 for (i=0,ncc=0;i<graph->ncc;i++) { 1356 PetscInt lc = ocptr[i+1]-ocptr[i]; 1357 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1358 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1359 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1360 ncc++; 1361 lc--; 1362 cum++; 1363 while (cum < nee && cedges[cum] < 0) cum++; 1364 } 1365 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1366 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1367 ncc++; 1368 } 1369 graph->ncc = ncc; 1370 if (pcbddc->nedclocal) { 1371 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1372 } 1373 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1374 } 1375 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1376 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1377 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1378 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1379 1380 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1381 ierr = PetscFree(extrow);CHKERRQ(ierr); 1382 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1383 ierr = PetscFree(corners);CHKERRQ(ierr); 1384 ierr = PetscFree(cedges);CHKERRQ(ierr); 1385 ierr = PetscFree(extrows);CHKERRQ(ierr); 1386 ierr = PetscFree(extcols);CHKERRQ(ierr); 1387 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1388 1389 /* Complete assembling */ 1390 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1391 if (pcbddc->nedcG) { 1392 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1393 #if 0 1394 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1395 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1396 #endif 1397 } 1398 1399 /* set change of basis */ 1400 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1401 ierr = MatDestroy(&T);CHKERRQ(ierr); 1402 1403 PetscFunctionReturn(0); 1404 } 1405 1406 /* the near-null space of BDDC carries information on quadrature weights, 1407 and these can be collinear -> so cheat with MatNullSpaceCreate 1408 and create a suitable set of basis vectors first */ 1409 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1410 { 1411 PetscErrorCode ierr; 1412 PetscInt i; 1413 1414 PetscFunctionBegin; 1415 for (i=0;i<nvecs;i++) { 1416 PetscInt first,last; 1417 1418 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1419 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1420 if (i>=first && i < last) { 1421 PetscScalar *data; 1422 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1423 if (!has_const) { 1424 data[i-first] = 1.; 1425 } else { 1426 data[2*i-first] = 1./PetscSqrtReal(2.); 1427 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1428 } 1429 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1430 } 1431 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1432 } 1433 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1434 for (i=0;i<nvecs;i++) { /* reset vectors */ 1435 PetscInt first,last; 1436 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1437 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1438 if (i>=first && i < last) { 1439 PetscScalar *data; 1440 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1441 if (!has_const) { 1442 data[i-first] = 0.; 1443 } else { 1444 data[2*i-first] = 0.; 1445 data[2*i-first+1] = 0.; 1446 } 1447 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1448 } 1449 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1450 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1451 } 1452 PetscFunctionReturn(0); 1453 } 1454 1455 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1456 { 1457 Mat loc_divudotp; 1458 Vec p,v,vins,quad_vec,*quad_vecs; 1459 ISLocalToGlobalMapping map; 1460 PetscScalar *vals; 1461 const PetscScalar *array; 1462 PetscInt i,maxneighs,maxsize; 1463 PetscInt n_neigh,*neigh,*n_shared,**shared; 1464 PetscMPIInt rank; 1465 PetscErrorCode ierr; 1466 1467 PetscFunctionBegin; 1468 ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1469 ierr = MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1470 if (!maxneighs) { 1471 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1472 *nnsp = NULL; 1473 PetscFunctionReturn(0); 1474 } 1475 maxsize = 0; 1476 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1477 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1478 /* create vectors to hold quadrature weights */ 1479 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1480 if (!transpose) { 1481 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1482 } else { 1483 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1484 } 1485 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1486 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1487 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1488 for (i=0;i<maxneighs;i++) { 1489 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1490 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1491 } 1492 1493 /* compute local quad vec */ 1494 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1495 if (!transpose) { 1496 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1497 } else { 1498 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1499 } 1500 ierr = VecSet(p,1.);CHKERRQ(ierr); 1501 if (!transpose) { 1502 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1503 } else { 1504 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1505 } 1506 if (vl2l) { 1507 Mat lA; 1508 VecScatter sc; 1509 1510 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1511 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1512 ierr = VecScatterCreateWithData(v,NULL,vins,vl2l,&sc);CHKERRQ(ierr); 1513 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1514 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1515 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1516 } else { 1517 vins = v; 1518 } 1519 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1520 ierr = VecDestroy(&p);CHKERRQ(ierr); 1521 1522 /* insert in global quadrature vecs */ 1523 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1524 for (i=0;i<n_neigh;i++) { 1525 const PetscInt *idxs; 1526 PetscInt idx,nn,j; 1527 1528 idxs = shared[i]; 1529 nn = n_shared[i]; 1530 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1531 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1532 idx = -(idx+1); 1533 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1534 } 1535 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1536 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1537 if (vl2l) { 1538 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1539 } 1540 ierr = VecDestroy(&v);CHKERRQ(ierr); 1541 ierr = PetscFree(vals);CHKERRQ(ierr); 1542 1543 /* assemble near null space */ 1544 for (i=0;i<maxneighs;i++) { 1545 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1546 } 1547 for (i=0;i<maxneighs;i++) { 1548 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1549 ierr = VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");CHKERRQ(ierr); 1550 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1551 } 1552 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1553 PetscFunctionReturn(0); 1554 } 1555 1556 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1557 { 1558 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1559 PetscErrorCode ierr; 1560 1561 PetscFunctionBegin; 1562 if (primalv) { 1563 if (pcbddc->user_primal_vertices_local) { 1564 IS list[2], newp; 1565 1566 list[0] = primalv; 1567 list[1] = pcbddc->user_primal_vertices_local; 1568 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1569 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1570 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1571 pcbddc->user_primal_vertices_local = newp; 1572 } else { 1573 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1574 } 1575 } 1576 PetscFunctionReturn(0); 1577 } 1578 1579 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1580 { 1581 PetscInt f, *comp = (PetscInt *)ctx; 1582 1583 PetscFunctionBegin; 1584 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1585 PetscFunctionReturn(0); 1586 } 1587 1588 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1589 { 1590 PetscErrorCode ierr; 1591 Vec local,global; 1592 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1593 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1594 PetscBool monolithic = PETSC_FALSE; 1595 1596 PetscFunctionBegin; 1597 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1598 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1599 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1600 /* need to convert from global to local topology information and remove references to information in global ordering */ 1601 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1602 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1603 if (monolithic) { /* just get block size to properly compute vertices */ 1604 if (pcbddc->vertex_size == 1) { 1605 ierr = MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);CHKERRQ(ierr); 1606 } 1607 goto boundary; 1608 } 1609 1610 if (pcbddc->user_provided_isfordofs) { 1611 if (pcbddc->n_ISForDofs) { 1612 PetscInt i; 1613 1614 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1615 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1616 PetscInt bs; 1617 1618 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1619 ierr = ISGetBlockSize(pcbddc->ISForDofs[i],&bs);CHKERRQ(ierr); 1620 ierr = ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);CHKERRQ(ierr); 1621 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1622 } 1623 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1624 pcbddc->n_ISForDofs = 0; 1625 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1626 } 1627 } else { 1628 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1629 DM dm; 1630 1631 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1632 if (!dm) { 1633 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1634 } 1635 if (dm) { 1636 IS *fields; 1637 PetscInt nf,i; 1638 1639 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1640 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1641 for (i=0;i<nf;i++) { 1642 PetscInt bs; 1643 1644 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1645 ierr = ISGetBlockSize(fields[i],&bs);CHKERRQ(ierr); 1646 ierr = ISSetBlockSize(pcbddc->ISForDofsLocal[i],bs);CHKERRQ(ierr); 1647 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1648 } 1649 ierr = PetscFree(fields);CHKERRQ(ierr); 1650 pcbddc->n_ISForDofsLocal = nf; 1651 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1652 PetscContainer c; 1653 1654 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1655 if (c) { 1656 MatISLocalFields lf; 1657 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1658 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1659 } else { /* fallback, create the default fields if bs > 1 */ 1660 PetscInt i, n = matis->A->rmap->n; 1661 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1662 if (i > 1) { 1663 pcbddc->n_ISForDofsLocal = i; 1664 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1665 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1666 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1667 } 1668 } 1669 } 1670 } 1671 } else { 1672 PetscInt i; 1673 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1674 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1675 } 1676 } 1677 } 1678 1679 boundary: 1680 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1681 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1682 } else if (pcbddc->DirichletBoundariesLocal) { 1683 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1684 } 1685 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1686 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1687 } else if (pcbddc->NeumannBoundariesLocal) { 1688 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1689 } 1690 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1691 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1692 } 1693 ierr = VecDestroy(&global);CHKERRQ(ierr); 1694 ierr = VecDestroy(&local);CHKERRQ(ierr); 1695 /* detect local disconnected subdomains if requested (use matis->A) */ 1696 if (pcbddc->detect_disconnected) { 1697 IS primalv = NULL; 1698 PetscInt i; 1699 PetscBool filter = pcbddc->detect_disconnected_filter; 1700 1701 for (i=0;i<pcbddc->n_local_subs;i++) { 1702 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1703 } 1704 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1705 ierr = PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1706 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1707 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1708 } 1709 /* early stage corner detection */ 1710 { 1711 DM dm; 1712 1713 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1714 if (dm) { 1715 PetscBool isda; 1716 1717 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1718 if (isda) { 1719 ISLocalToGlobalMapping l2l; 1720 IS corners; 1721 Mat lA; 1722 1723 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1724 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1725 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1726 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1727 if (l2l && corners) { 1728 const PetscInt *idx; 1729 PetscInt dof,bs,*idxout,n; 1730 1731 ierr = DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 1732 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1733 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1734 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1735 if (bs == dof) { 1736 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1737 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1738 } else { /* the original DMDA local-to-local map have been modified */ 1739 PetscInt i,d; 1740 1741 ierr = PetscMalloc1(dof*n,&idxout);CHKERRQ(ierr); 1742 for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d; 1743 ierr = ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);CHKERRQ(ierr); 1744 1745 bs = 1; 1746 n *= dof; 1747 } 1748 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1749 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1750 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1751 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1752 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1753 pcbddc->corner_selected = PETSC_TRUE; 1754 } else if (corners) { /* not from DMDA */ 1755 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1756 } 1757 } 1758 } 1759 } 1760 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1761 DM dm; 1762 1763 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1764 if (!dm) { 1765 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1766 } 1767 if (dm) { 1768 Vec vcoords; 1769 PetscSection section; 1770 PetscReal *coords; 1771 PetscInt d,cdim,nl,nf,**ctxs; 1772 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1773 1774 ierr = DMGetCoordinateDim(dm,&cdim);CHKERRQ(ierr); 1775 ierr = DMGetSection(dm,§ion);CHKERRQ(ierr); 1776 ierr = PetscSectionGetNumFields(section,&nf);CHKERRQ(ierr); 1777 ierr = DMCreateGlobalVector(dm,&vcoords);CHKERRQ(ierr); 1778 ierr = VecGetLocalSize(vcoords,&nl);CHKERRQ(ierr); 1779 ierr = PetscMalloc1(nl*cdim,&coords);CHKERRQ(ierr); 1780 ierr = PetscMalloc2(nf,&funcs,nf,&ctxs);CHKERRQ(ierr); 1781 ierr = PetscMalloc1(nf,&ctxs[0]);CHKERRQ(ierr); 1782 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1783 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1784 for (d=0;d<cdim;d++) { 1785 PetscInt i; 1786 const PetscScalar *v; 1787 1788 for (i=0;i<nf;i++) ctxs[i][0] = d; 1789 ierr = DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);CHKERRQ(ierr); 1790 ierr = VecGetArrayRead(vcoords,&v);CHKERRQ(ierr); 1791 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1792 ierr = VecRestoreArrayRead(vcoords,&v);CHKERRQ(ierr); 1793 } 1794 ierr = VecDestroy(&vcoords);CHKERRQ(ierr); 1795 ierr = PCSetCoordinates(pc,cdim,nl,coords);CHKERRQ(ierr); 1796 ierr = PetscFree(coords);CHKERRQ(ierr); 1797 ierr = PetscFree(ctxs[0]);CHKERRQ(ierr); 1798 ierr = PetscFree2(funcs,ctxs);CHKERRQ(ierr); 1799 } 1800 } 1801 PetscFunctionReturn(0); 1802 } 1803 1804 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1805 { 1806 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1807 PetscErrorCode ierr; 1808 IS nis; 1809 const PetscInt *idxs; 1810 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1811 PetscBool *ld; 1812 1813 PetscFunctionBegin; 1814 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1815 if (mop == MPI_LAND) { 1816 /* init rootdata with true */ 1817 ld = (PetscBool*) matis->sf_rootdata; 1818 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1819 } else { 1820 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1821 } 1822 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1823 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1824 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1825 ld = (PetscBool*) matis->sf_leafdata; 1826 for (i=0;i<nd;i++) 1827 if (-1 < idxs[i] && idxs[i] < n) 1828 ld[idxs[i]] = PETSC_TRUE; 1829 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1830 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1831 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1832 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1833 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1834 if (mop == MPI_LAND) { 1835 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1836 } else { 1837 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1838 } 1839 for (i=0,nnd=0;i<n;i++) 1840 if (ld[i]) 1841 nidxs[nnd++] = i; 1842 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1843 ierr = ISDestroy(is);CHKERRQ(ierr); 1844 *is = nis; 1845 PetscFunctionReturn(0); 1846 } 1847 1848 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1849 { 1850 PC_IS *pcis = (PC_IS*)(pc->data); 1851 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1852 PetscErrorCode ierr; 1853 1854 PetscFunctionBegin; 1855 if (!pcbddc->benign_have_null) { 1856 PetscFunctionReturn(0); 1857 } 1858 if (pcbddc->ChangeOfBasisMatrix) { 1859 Vec swap; 1860 1861 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1862 swap = pcbddc->work_change; 1863 pcbddc->work_change = r; 1864 r = swap; 1865 } 1866 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1867 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1868 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1869 ierr = KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);CHKERRQ(ierr); 1870 ierr = VecSet(z,0.);CHKERRQ(ierr); 1871 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1872 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1873 if (pcbddc->ChangeOfBasisMatrix) { 1874 pcbddc->work_change = r; 1875 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1876 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1877 } 1878 PetscFunctionReturn(0); 1879 } 1880 1881 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1882 { 1883 PCBDDCBenignMatMult_ctx ctx; 1884 PetscErrorCode ierr; 1885 PetscBool apply_right,apply_left,reset_x; 1886 1887 PetscFunctionBegin; 1888 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1889 if (transpose) { 1890 apply_right = ctx->apply_left; 1891 apply_left = ctx->apply_right; 1892 } else { 1893 apply_right = ctx->apply_right; 1894 apply_left = ctx->apply_left; 1895 } 1896 reset_x = PETSC_FALSE; 1897 if (apply_right) { 1898 const PetscScalar *ax; 1899 PetscInt nl,i; 1900 1901 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1902 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1903 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1904 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1905 for (i=0;i<ctx->benign_n;i++) { 1906 PetscScalar sum,val; 1907 const PetscInt *idxs; 1908 PetscInt nz,j; 1909 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1910 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1911 sum = 0.; 1912 if (ctx->apply_p0) { 1913 val = ctx->work[idxs[nz-1]]; 1914 for (j=0;j<nz-1;j++) { 1915 sum += ctx->work[idxs[j]]; 1916 ctx->work[idxs[j]] += val; 1917 } 1918 } else { 1919 for (j=0;j<nz-1;j++) { 1920 sum += ctx->work[idxs[j]]; 1921 } 1922 } 1923 ctx->work[idxs[nz-1]] -= sum; 1924 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1925 } 1926 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1927 reset_x = PETSC_TRUE; 1928 } 1929 if (transpose) { 1930 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1931 } else { 1932 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1933 } 1934 if (reset_x) { 1935 ierr = VecResetArray(x);CHKERRQ(ierr); 1936 } 1937 if (apply_left) { 1938 PetscScalar *ay; 1939 PetscInt i; 1940 1941 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1942 for (i=0;i<ctx->benign_n;i++) { 1943 PetscScalar sum,val; 1944 const PetscInt *idxs; 1945 PetscInt nz,j; 1946 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1947 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1948 val = -ay[idxs[nz-1]]; 1949 if (ctx->apply_p0) { 1950 sum = 0.; 1951 for (j=0;j<nz-1;j++) { 1952 sum += ay[idxs[j]]; 1953 ay[idxs[j]] += val; 1954 } 1955 ay[idxs[nz-1]] += sum; 1956 } else { 1957 for (j=0;j<nz-1;j++) { 1958 ay[idxs[j]] += val; 1959 } 1960 ay[idxs[nz-1]] = 0.; 1961 } 1962 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1963 } 1964 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1965 } 1966 PetscFunctionReturn(0); 1967 } 1968 1969 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1970 { 1971 PetscErrorCode ierr; 1972 1973 PetscFunctionBegin; 1974 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1975 PetscFunctionReturn(0); 1976 } 1977 1978 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1979 { 1980 PetscErrorCode ierr; 1981 1982 PetscFunctionBegin; 1983 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1984 PetscFunctionReturn(0); 1985 } 1986 1987 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1988 { 1989 PC_IS *pcis = (PC_IS*)pc->data; 1990 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1991 PCBDDCBenignMatMult_ctx ctx; 1992 PetscErrorCode ierr; 1993 1994 PetscFunctionBegin; 1995 if (!restore) { 1996 Mat A_IB,A_BI; 1997 PetscScalar *work; 1998 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1999 2000 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2001 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2002 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 2003 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 2004 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2005 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 2006 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 2007 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 2008 ierr = PetscNew(&ctx);CHKERRQ(ierr); 2009 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 2010 ctx->apply_left = PETSC_TRUE; 2011 ctx->apply_right = PETSC_FALSE; 2012 ctx->apply_p0 = PETSC_FALSE; 2013 ctx->benign_n = pcbddc->benign_n; 2014 if (reuse) { 2015 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2016 ctx->free = PETSC_FALSE; 2017 } else { /* TODO: could be optimized for successive solves */ 2018 ISLocalToGlobalMapping N_to_D; 2019 PetscInt i; 2020 2021 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2022 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2023 for (i=0;i<pcbddc->benign_n;i++) { 2024 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2025 } 2026 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2027 ctx->free = PETSC_TRUE; 2028 } 2029 ctx->A = pcis->A_IB; 2030 ctx->work = work; 2031 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2032 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2033 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2034 pcis->A_IB = A_IB; 2035 2036 /* A_BI as A_IB^T */ 2037 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2038 pcbddc->benign_original_mat = pcis->A_BI; 2039 pcis->A_BI = A_BI; 2040 } else { 2041 if (!pcbddc->benign_original_mat) { 2042 PetscFunctionReturn(0); 2043 } 2044 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2045 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2046 pcis->A_IB = ctx->A; 2047 ctx->A = NULL; 2048 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2049 pcis->A_BI = pcbddc->benign_original_mat; 2050 pcbddc->benign_original_mat = NULL; 2051 if (ctx->free) { 2052 PetscInt i; 2053 for (i=0;i<ctx->benign_n;i++) { 2054 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2055 } 2056 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2057 } 2058 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2059 ierr = PetscFree(ctx);CHKERRQ(ierr); 2060 } 2061 PetscFunctionReturn(0); 2062 } 2063 2064 /* used just in bddc debug mode */ 2065 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2066 { 2067 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2068 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2069 Mat An; 2070 PetscErrorCode ierr; 2071 2072 PetscFunctionBegin; 2073 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2074 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2075 if (is1) { 2076 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2077 ierr = MatDestroy(&An);CHKERRQ(ierr); 2078 } else { 2079 *B = An; 2080 } 2081 PetscFunctionReturn(0); 2082 } 2083 2084 /* TODO: add reuse flag */ 2085 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2086 { 2087 Mat Bt; 2088 PetscScalar *a,*bdata; 2089 const PetscInt *ii,*ij; 2090 PetscInt m,n,i,nnz,*bii,*bij; 2091 PetscBool flg_row; 2092 PetscErrorCode ierr; 2093 2094 PetscFunctionBegin; 2095 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2096 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2097 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2098 nnz = n; 2099 for (i=0;i<ii[n];i++) { 2100 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2101 } 2102 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2103 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2104 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2105 nnz = 0; 2106 bii[0] = 0; 2107 for (i=0;i<n;i++) { 2108 PetscInt j; 2109 for (j=ii[i];j<ii[i+1];j++) { 2110 PetscScalar entry = a[j]; 2111 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2112 bij[nnz] = ij[j]; 2113 bdata[nnz] = entry; 2114 nnz++; 2115 } 2116 } 2117 bii[i+1] = nnz; 2118 } 2119 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2120 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2121 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2122 { 2123 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2124 b->free_a = PETSC_TRUE; 2125 b->free_ij = PETSC_TRUE; 2126 } 2127 if (*B == A) { 2128 ierr = MatDestroy(&A);CHKERRQ(ierr); 2129 } 2130 *B = Bt; 2131 PetscFunctionReturn(0); 2132 } 2133 2134 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv) 2135 { 2136 Mat B = NULL; 2137 DM dm; 2138 IS is_dummy,*cc_n; 2139 ISLocalToGlobalMapping l2gmap_dummy; 2140 PCBDDCGraph graph; 2141 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2142 PetscInt i,n; 2143 PetscInt *xadj,*adjncy; 2144 PetscBool isplex = PETSC_FALSE; 2145 PetscErrorCode ierr; 2146 2147 PetscFunctionBegin; 2148 if (ncc) *ncc = 0; 2149 if (cc) *cc = NULL; 2150 if (primalv) *primalv = NULL; 2151 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2152 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2153 if (!dm) { 2154 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2155 } 2156 if (dm) { 2157 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2158 } 2159 if (filter) isplex = PETSC_FALSE; 2160 2161 if (isplex) { /* this code has been modified from plexpartition.c */ 2162 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2163 PetscInt *adj = NULL; 2164 IS cellNumbering; 2165 const PetscInt *cellNum; 2166 PetscBool useCone, useClosure; 2167 PetscSection section; 2168 PetscSegBuffer adjBuffer; 2169 PetscSF sfPoint; 2170 PetscErrorCode ierr; 2171 2172 PetscFunctionBegin; 2173 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2174 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2175 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2176 /* Build adjacency graph via a section/segbuffer */ 2177 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2178 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2179 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2180 /* Always use FVM adjacency to create partitioner graph */ 2181 ierr = DMGetBasicAdjacency(dm, &useCone, &useClosure);CHKERRQ(ierr); 2182 ierr = DMSetBasicAdjacency(dm, PETSC_TRUE, PETSC_FALSE);CHKERRQ(ierr); 2183 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2184 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2185 for (n = 0, p = pStart; p < pEnd; p++) { 2186 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2187 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2188 adjSize = PETSC_DETERMINE; 2189 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2190 for (a = 0; a < adjSize; ++a) { 2191 const PetscInt point = adj[a]; 2192 if (pStart <= point && point < pEnd) { 2193 PetscInt *PETSC_RESTRICT pBuf; 2194 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2195 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2196 *pBuf = point; 2197 } 2198 } 2199 n++; 2200 } 2201 ierr = DMSetBasicAdjacency(dm, useCone, useClosure);CHKERRQ(ierr); 2202 /* Derive CSR graph from section/segbuffer */ 2203 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2204 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2205 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2206 for (idx = 0, p = pStart; p < pEnd; p++) { 2207 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2208 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2209 } 2210 xadj[n] = size; 2211 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2212 /* Clean up */ 2213 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2214 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2215 ierr = PetscFree(adj);CHKERRQ(ierr); 2216 graph->xadj = xadj; 2217 graph->adjncy = adjncy; 2218 } else { 2219 Mat A; 2220 PetscBool isseqaij, flg_row; 2221 2222 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2223 if (!A->rmap->N || !A->cmap->N) { 2224 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2225 PetscFunctionReturn(0); 2226 } 2227 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2228 if (!isseqaij && filter) { 2229 PetscBool isseqdense; 2230 2231 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2232 if (!isseqdense) { 2233 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2234 } else { /* TODO: rectangular case and LDA */ 2235 PetscScalar *array; 2236 PetscReal chop=1.e-6; 2237 2238 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2239 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2240 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2241 for (i=0;i<n;i++) { 2242 PetscInt j; 2243 for (j=i+1;j<n;j++) { 2244 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2245 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2246 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2247 } 2248 } 2249 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2250 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2251 } 2252 } else { 2253 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2254 B = A; 2255 } 2256 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2257 2258 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2259 if (filter) { 2260 PetscScalar *data; 2261 PetscInt j,cum; 2262 2263 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2264 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2265 cum = 0; 2266 for (i=0;i<n;i++) { 2267 PetscInt t; 2268 2269 for (j=xadj[i];j<xadj[i+1];j++) { 2270 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2271 continue; 2272 } 2273 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2274 } 2275 t = xadj_filtered[i]; 2276 xadj_filtered[i] = cum; 2277 cum += t; 2278 } 2279 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2280 graph->xadj = xadj_filtered; 2281 graph->adjncy = adjncy_filtered; 2282 } else { 2283 graph->xadj = xadj; 2284 graph->adjncy = adjncy; 2285 } 2286 } 2287 /* compute local connected components using PCBDDCGraph */ 2288 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2289 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2290 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2291 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2292 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2293 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2294 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2295 2296 /* partial clean up */ 2297 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2298 if (B) { 2299 PetscBool flg_row; 2300 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2301 ierr = MatDestroy(&B);CHKERRQ(ierr); 2302 } 2303 if (isplex) { 2304 ierr = PetscFree(xadj);CHKERRQ(ierr); 2305 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2306 } 2307 2308 /* get back data */ 2309 if (isplex) { 2310 if (ncc) *ncc = graph->ncc; 2311 if (cc || primalv) { 2312 Mat A; 2313 PetscBT btv,btvt; 2314 PetscSection subSection; 2315 PetscInt *ids,cum,cump,*cids,*pids; 2316 2317 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2318 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2319 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2320 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2321 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2322 2323 cids[0] = 0; 2324 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2325 PetscInt j; 2326 2327 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2328 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2329 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2330 2331 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2332 for (k = 0; k < 2*size; k += 2) { 2333 PetscInt s, p = closure[k], off, dof, cdof; 2334 2335 ierr = PetscSectionGetConstraintDof(subSection, p, &cdof);CHKERRQ(ierr); 2336 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2337 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2338 for (s = 0; s < dof-cdof; s++) { 2339 if (PetscBTLookupSet(btvt,off+s)) continue; 2340 if (!PetscBTLookup(btv,off+s)) { 2341 ids[cum++] = off+s; 2342 } else { /* cross-vertex */ 2343 pids[cump++] = off+s; 2344 } 2345 } 2346 } 2347 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2348 } 2349 cids[i+1] = cum; 2350 /* mark dofs as already assigned */ 2351 for (j = cids[i]; j < cids[i+1]; j++) { 2352 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2353 } 2354 } 2355 if (cc) { 2356 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2357 for (i = 0; i < graph->ncc; i++) { 2358 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2359 } 2360 *cc = cc_n; 2361 } 2362 if (primalv) { 2363 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2364 } 2365 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2366 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2367 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2368 } 2369 } else { 2370 if (ncc) *ncc = graph->ncc; 2371 if (cc) { 2372 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2373 for (i=0;i<graph->ncc;i++) { 2374 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2375 } 2376 *cc = cc_n; 2377 } 2378 } 2379 /* clean up graph */ 2380 graph->xadj = 0; 2381 graph->adjncy = 0; 2382 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2383 PetscFunctionReturn(0); 2384 } 2385 2386 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2387 { 2388 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2389 PC_IS* pcis = (PC_IS*)(pc->data); 2390 IS dirIS = NULL; 2391 PetscInt i; 2392 PetscErrorCode ierr; 2393 2394 PetscFunctionBegin; 2395 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2396 if (zerodiag) { 2397 Mat A; 2398 Vec vec3_N; 2399 PetscScalar *vals; 2400 const PetscInt *idxs; 2401 PetscInt nz,*count; 2402 2403 /* p0 */ 2404 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2405 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2406 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2407 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2408 for (i=0;i<nz;i++) vals[i] = 1.; 2409 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2410 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2411 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2412 /* v_I */ 2413 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2414 for (i=0;i<nz;i++) vals[i] = 0.; 2415 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2416 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2417 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2418 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2419 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2420 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2421 if (dirIS) { 2422 PetscInt n; 2423 2424 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2425 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2426 for (i=0;i<n;i++) vals[i] = 0.; 2427 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2428 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2429 } 2430 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2431 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2432 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2433 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2434 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2435 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2436 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2437 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2438 ierr = PetscFree(vals);CHKERRQ(ierr); 2439 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2440 2441 /* there should not be any pressure dofs lying on the interface */ 2442 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2443 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2444 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2445 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2446 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2447 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]); 2448 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2449 ierr = PetscFree(count);CHKERRQ(ierr); 2450 } 2451 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2452 2453 /* check PCBDDCBenignGetOrSetP0 */ 2454 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2455 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2456 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2457 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2458 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2459 for (i=0;i<pcbddc->benign_n;i++) { 2460 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2461 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2462 } 2463 PetscFunctionReturn(0); 2464 } 2465 2466 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, PetscBool reuse, IS *zerodiaglocal) 2467 { 2468 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2469 IS pressures = NULL,zerodiag = NULL,*bzerodiag = NULL,zerodiag_save,*zerodiag_subs; 2470 PetscInt nz,n,benign_n,bsp = 1; 2471 PetscInt *interior_dofs,n_interior_dofs,nneu; 2472 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2473 PetscErrorCode ierr; 2474 2475 PetscFunctionBegin; 2476 if (reuse) goto project_b0; 2477 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2478 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2479 for (n=0;n<pcbddc->benign_n;n++) { 2480 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2481 } 2482 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2483 has_null_pressures = PETSC_TRUE; 2484 have_null = PETSC_TRUE; 2485 /* if a local information on dofs is present, gets pressure dofs from command line (uses the last field is not provided) 2486 Without local information, it uses only the zerodiagonal dofs (ok if the pressure block is all zero and it is a scalar field) 2487 Checks if all the pressure dofs in each subdomain have a zero diagonal 2488 If not, a change of basis on pressures is not needed 2489 since the local Schur complements are already SPD 2490 */ 2491 if (pcbddc->n_ISForDofsLocal) { 2492 IS iP = NULL; 2493 PetscInt p,*pp; 2494 PetscBool flg; 2495 2496 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pp);CHKERRQ(ierr); 2497 n = pcbddc->n_ISForDofsLocal; 2498 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2499 ierr = PetscOptionsIntArray("-pc_bddc_pressure_field","Field id for pressures",NULL,pp,&n,&flg);CHKERRQ(ierr); 2500 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2501 if (!flg) { 2502 n = 1; 2503 pp[0] = pcbddc->n_ISForDofsLocal-1; 2504 } 2505 2506 bsp = 0; 2507 for (p=0;p<n;p++) { 2508 PetscInt bs; 2509 2510 if (pp[p] < 0 || pp[p] > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",pp[p]); 2511 ierr = ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);CHKERRQ(ierr); 2512 bsp += bs; 2513 } 2514 ierr = PetscMalloc1(bsp,&bzerodiag);CHKERRQ(ierr); 2515 bsp = 0; 2516 for (p=0;p<n;p++) { 2517 const PetscInt *idxs; 2518 PetscInt b,bs,npl,*bidxs; 2519 2520 ierr = ISGetBlockSize(pcbddc->ISForDofsLocal[pp[p]],&bs);CHKERRQ(ierr); 2521 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[pp[p]],&npl);CHKERRQ(ierr); 2522 ierr = ISGetIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);CHKERRQ(ierr); 2523 ierr = PetscMalloc1(npl/bs,&bidxs);CHKERRQ(ierr); 2524 for (b=0;b<bs;b++) { 2525 PetscInt i; 2526 2527 for (i=0;i<npl/bs;i++) bidxs[i] = idxs[bs*i+b]; 2528 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl/bs,bidxs,PETSC_COPY_VALUES,&bzerodiag[bsp]);CHKERRQ(ierr); 2529 bsp++; 2530 } 2531 ierr = PetscFree(bidxs);CHKERRQ(ierr); 2532 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[pp[p]],&idxs);CHKERRQ(ierr); 2533 } 2534 ierr = ISConcatenate(PETSC_COMM_SELF,bsp,bzerodiag,&pressures);CHKERRQ(ierr); 2535 2536 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2537 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2538 if (iP) { 2539 IS newpressures; 2540 2541 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2542 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2543 pressures = newpressures; 2544 } 2545 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2546 if (!sorted) { 2547 ierr = ISSort(pressures);CHKERRQ(ierr); 2548 } 2549 ierr = PetscFree(pp);CHKERRQ(ierr); 2550 } 2551 2552 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2553 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2554 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2555 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2556 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2557 if (!sorted) { 2558 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2559 } 2560 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2561 zerodiag_save = zerodiag; 2562 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2563 if (!nz) { 2564 if (n) have_null = PETSC_FALSE; 2565 has_null_pressures = PETSC_FALSE; 2566 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2567 } 2568 recompute_zerodiag = PETSC_FALSE; 2569 2570 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2571 zerodiag_subs = NULL; 2572 benign_n = 0; 2573 n_interior_dofs = 0; 2574 interior_dofs = NULL; 2575 nneu = 0; 2576 if (pcbddc->NeumannBoundariesLocal) { 2577 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2578 } 2579 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2580 if (checkb) { /* need to compute interior nodes */ 2581 PetscInt n,i,j; 2582 PetscInt n_neigh,*neigh,*n_shared,**shared; 2583 PetscInt *iwork; 2584 2585 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2586 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2587 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2588 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2589 for (i=1;i<n_neigh;i++) 2590 for (j=0;j<n_shared[i];j++) 2591 iwork[shared[i][j]] += 1; 2592 for (i=0;i<n;i++) 2593 if (!iwork[i]) 2594 interior_dofs[n_interior_dofs++] = i; 2595 ierr = PetscFree(iwork);CHKERRQ(ierr); 2596 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2597 } 2598 if (has_null_pressures) { 2599 IS *subs; 2600 PetscInt nsubs,i,j,nl; 2601 const PetscInt *idxs; 2602 PetscScalar *array; 2603 Vec *work; 2604 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2605 2606 subs = pcbddc->local_subs; 2607 nsubs = pcbddc->n_local_subs; 2608 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2609 if (checkb) { 2610 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2611 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2612 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2613 /* work[0] = 1_p */ 2614 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2615 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2616 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2617 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2618 /* work[0] = 1_v */ 2619 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2620 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2621 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2622 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2623 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2624 } 2625 2626 if (nsubs > 1 || bsp > 1) { 2627 IS *is; 2628 PetscInt b,totb; 2629 2630 totb = bsp; 2631 is = bsp > 1 ? bzerodiag : &zerodiag; 2632 nsubs = PetscMax(nsubs,1); 2633 ierr = PetscCalloc1(nsubs*totb,&zerodiag_subs);CHKERRQ(ierr); 2634 for (b=0;b<totb;b++) { 2635 for (i=0;i<nsubs;i++) { 2636 ISLocalToGlobalMapping l2g; 2637 IS t_zerodiag_subs; 2638 PetscInt nl; 2639 2640 if (subs) { 2641 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2642 } else { 2643 IS tis; 2644 2645 ierr = MatGetLocalSize(pcbddc->local_mat,&nl,NULL);CHKERRQ(ierr); 2646 ierr = ISCreateStride(PETSC_COMM_SELF,nl,0,1,&tis);CHKERRQ(ierr); 2647 ierr = ISLocalToGlobalMappingCreateIS(tis,&l2g);CHKERRQ(ierr); 2648 ierr = ISDestroy(&tis);CHKERRQ(ierr); 2649 } 2650 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,is[b],&t_zerodiag_subs);CHKERRQ(ierr); 2651 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2652 if (nl) { 2653 PetscBool valid = PETSC_TRUE; 2654 2655 if (checkb) { 2656 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2657 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2658 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2659 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2660 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2661 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2662 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2663 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2664 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2665 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2666 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2667 for (j=0;j<n_interior_dofs;j++) { 2668 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2669 valid = PETSC_FALSE; 2670 break; 2671 } 2672 } 2673 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2674 } 2675 if (valid && nneu) { 2676 const PetscInt *idxs; 2677 PetscInt nzb; 2678 2679 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2680 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2681 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2682 if (nzb) valid = PETSC_FALSE; 2683 } 2684 if (valid && pressures) { 2685 IS t_pressure_subs,tmp; 2686 PetscInt i1,i2; 2687 2688 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2689 ierr = ISEmbed(t_zerodiag_subs,t_pressure_subs,PETSC_TRUE,&tmp);CHKERRQ(ierr); 2690 ierr = ISGetLocalSize(tmp,&i1);CHKERRQ(ierr); 2691 ierr = ISGetLocalSize(t_zerodiag_subs,&i2);CHKERRQ(ierr); 2692 if (i2 != i1) valid = PETSC_FALSE; 2693 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2694 ierr = ISDestroy(&tmp);CHKERRQ(ierr); 2695 } 2696 if (valid) { 2697 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[benign_n]);CHKERRQ(ierr); 2698 benign_n++; 2699 } else recompute_zerodiag = PETSC_TRUE; 2700 } 2701 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2702 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2703 } 2704 } 2705 } else { /* there's just one subdomain (or zero if they have not been detected */ 2706 PetscBool valid = PETSC_TRUE; 2707 2708 if (nneu) valid = PETSC_FALSE; 2709 if (valid && pressures) { 2710 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2711 } 2712 if (valid && checkb) { 2713 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2714 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2715 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2716 for (j=0;j<n_interior_dofs;j++) { 2717 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2718 valid = PETSC_FALSE; 2719 break; 2720 } 2721 } 2722 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2723 } 2724 if (valid) { 2725 benign_n = 1; 2726 ierr = PetscMalloc1(benign_n,&zerodiag_subs);CHKERRQ(ierr); 2727 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2728 zerodiag_subs[0] = zerodiag; 2729 } 2730 } 2731 if (checkb) { 2732 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2733 } 2734 } 2735 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2736 2737 if (!benign_n) { 2738 PetscInt n; 2739 2740 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2741 recompute_zerodiag = PETSC_FALSE; 2742 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2743 if (n) have_null = PETSC_FALSE; 2744 } 2745 2746 /* final check for null pressures */ 2747 if (zerodiag && pressures) { 2748 ierr = ISEqual(pressures,zerodiag,&have_null);CHKERRQ(ierr); 2749 } 2750 2751 if (recompute_zerodiag) { 2752 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2753 if (benign_n == 1) { 2754 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2755 zerodiag = zerodiag_subs[0]; 2756 } else { 2757 PetscInt i,nzn,*new_idxs; 2758 2759 nzn = 0; 2760 for (i=0;i<benign_n;i++) { 2761 PetscInt ns; 2762 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2763 nzn += ns; 2764 } 2765 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2766 nzn = 0; 2767 for (i=0;i<benign_n;i++) { 2768 PetscInt ns,*idxs; 2769 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2770 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2771 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2772 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2773 nzn += ns; 2774 } 2775 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2776 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2777 } 2778 have_null = PETSC_FALSE; 2779 } 2780 2781 /* determines if the coarse solver will be singular or not */ 2782 ierr = MPIU_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2783 2784 /* Prepare matrix to compute no-net-flux */ 2785 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2786 Mat A,loc_divudotp; 2787 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2788 IS row,col,isused = NULL; 2789 PetscInt M,N,n,st,n_isused; 2790 2791 if (pressures) { 2792 isused = pressures; 2793 } else { 2794 isused = zerodiag_save; 2795 } 2796 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2797 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2798 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2799 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2800 n_isused = 0; 2801 if (isused) { 2802 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2803 } 2804 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2805 st = st-n_isused; 2806 if (n) { 2807 const PetscInt *gidxs; 2808 2809 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2810 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2811 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2812 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2813 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2814 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2815 } else { 2816 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2817 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2818 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2819 } 2820 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2821 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2822 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2823 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2824 ierr = ISDestroy(&row);CHKERRQ(ierr); 2825 ierr = ISDestroy(&col);CHKERRQ(ierr); 2826 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2827 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2828 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2829 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2830 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2831 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2832 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2833 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2834 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2835 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2836 } 2837 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2838 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2839 if (bzerodiag) { 2840 PetscInt i; 2841 2842 for (i=0;i<bsp;i++) { 2843 ierr = ISDestroy(&bzerodiag[i]);CHKERRQ(ierr); 2844 } 2845 ierr = PetscFree(bzerodiag);CHKERRQ(ierr); 2846 } 2847 pcbddc->benign_n = benign_n; 2848 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2849 2850 /* determines if the problem has subdomains with 0 pressure block */ 2851 have_null = (PetscBool)(!!pcbddc->benign_n); 2852 ierr = MPIU_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2853 2854 project_b0: 2855 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2856 /* change of basis and p0 dofs */ 2857 if (pcbddc->benign_n) { 2858 PetscInt i,s,*nnz; 2859 2860 /* local change of basis for pressures */ 2861 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2862 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2863 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2864 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2865 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2866 for (i=0;i<n;i++) nnz[i] = 1; /* defaults to identity */ 2867 for (i=0;i<pcbddc->benign_n;i++) { 2868 const PetscInt *idxs; 2869 PetscInt nzs,j; 2870 2871 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2872 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2873 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2874 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2875 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2876 } 2877 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2878 ierr = MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2879 ierr = PetscFree(nnz);CHKERRQ(ierr); 2880 /* set identity by default */ 2881 for (i=0;i<n;i++) { 2882 ierr = MatSetValue(pcbddc->benign_change,i,i,1.,INSERT_VALUES);CHKERRQ(ierr); 2883 } 2884 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2885 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2886 /* set change on pressures */ 2887 for (s=0;s<pcbddc->benign_n;s++) { 2888 PetscScalar *array; 2889 const PetscInt *idxs; 2890 PetscInt nzs; 2891 2892 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2893 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2894 for (i=0;i<nzs-1;i++) { 2895 PetscScalar vals[2]; 2896 PetscInt cols[2]; 2897 2898 cols[0] = idxs[i]; 2899 cols[1] = idxs[nzs-1]; 2900 vals[0] = 1.; 2901 vals[1] = 1.; 2902 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2903 } 2904 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2905 for (i=0;i<nzs-1;i++) array[i] = -1.; 2906 array[nzs-1] = 1.; 2907 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2908 /* store local idxs for p0 */ 2909 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2910 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2911 ierr = PetscFree(array);CHKERRQ(ierr); 2912 } 2913 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2914 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2915 2916 /* project if needed */ 2917 if (pcbddc->benign_change_explicit) { 2918 Mat M; 2919 2920 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2921 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2922 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2923 ierr = MatDestroy(&M);CHKERRQ(ierr); 2924 } 2925 /* store global idxs for p0 */ 2926 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2927 } 2928 *zerodiaglocal = zerodiag; 2929 PetscFunctionReturn(0); 2930 } 2931 2932 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2933 { 2934 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2935 PetscScalar *array; 2936 PetscErrorCode ierr; 2937 2938 PetscFunctionBegin; 2939 if (!pcbddc->benign_sf) { 2940 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2941 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2942 } 2943 if (get) { 2944 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2945 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2946 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2947 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2948 } else { 2949 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2950 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2951 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2952 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2953 } 2954 PetscFunctionReturn(0); 2955 } 2956 2957 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2958 { 2959 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2960 PetscErrorCode ierr; 2961 2962 PetscFunctionBegin; 2963 /* TODO: add error checking 2964 - avoid nested pop (or push) calls. 2965 - cannot push before pop. 2966 - cannot call this if pcbddc->local_mat is NULL 2967 */ 2968 if (!pcbddc->benign_n) { 2969 PetscFunctionReturn(0); 2970 } 2971 if (pop) { 2972 if (pcbddc->benign_change_explicit) { 2973 IS is_p0; 2974 MatReuse reuse; 2975 2976 /* extract B_0 */ 2977 reuse = MAT_INITIAL_MATRIX; 2978 if (pcbddc->benign_B0) { 2979 reuse = MAT_REUSE_MATRIX; 2980 } 2981 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2982 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2983 /* remove rows and cols from local problem */ 2984 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2985 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2986 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2987 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2988 } else { 2989 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2990 PetscScalar *vals; 2991 PetscInt i,n,*idxs_ins; 2992 2993 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2994 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2995 if (!pcbddc->benign_B0) { 2996 PetscInt *nnz; 2997 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2998 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2999 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 3000 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 3001 for (i=0;i<pcbddc->benign_n;i++) { 3002 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 3003 nnz[i] = n - nnz[i]; 3004 } 3005 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 3006 ierr = MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 3007 ierr = PetscFree(nnz);CHKERRQ(ierr); 3008 } 3009 3010 for (i=0;i<pcbddc->benign_n;i++) { 3011 PetscScalar *array; 3012 PetscInt *idxs,j,nz,cum; 3013 3014 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 3015 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3016 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3017 for (j=0;j<nz;j++) vals[j] = 1.; 3018 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 3019 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 3020 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 3021 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 3022 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 3023 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 3024 cum = 0; 3025 for (j=0;j<n;j++) { 3026 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3027 vals[cum] = array[j]; 3028 idxs_ins[cum] = j; 3029 cum++; 3030 } 3031 } 3032 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 3033 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 3034 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3035 } 3036 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3037 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3038 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 3039 } 3040 } else { /* push */ 3041 if (pcbddc->benign_change_explicit) { 3042 PetscInt i; 3043 3044 for (i=0;i<pcbddc->benign_n;i++) { 3045 PetscScalar *B0_vals; 3046 PetscInt *B0_cols,B0_ncol; 3047 3048 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3049 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3050 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3051 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 3052 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3053 } 3054 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3055 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3056 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!"); 3057 } 3058 PetscFunctionReturn(0); 3059 } 3060 3061 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3062 { 3063 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3064 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3065 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3066 PetscBLASInt *B_iwork,*B_ifail; 3067 PetscScalar *work,lwork; 3068 PetscScalar *St,*S,*eigv; 3069 PetscScalar *Sarray,*Starray; 3070 PetscReal *eigs,thresh,lthresh,uthresh; 3071 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3072 PetscBool allocated_S_St; 3073 #if defined(PETSC_USE_COMPLEX) 3074 PetscReal *rwork; 3075 #endif 3076 PetscErrorCode ierr; 3077 3078 PetscFunctionBegin; 3079 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3080 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3081 if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3082 ierr = PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3083 3084 if (pcbddc->dbg_flag) { 3085 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3086 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3087 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3088 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3089 } 3090 3091 if (pcbddc->dbg_flag) { 3092 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);CHKERRQ(ierr); 3093 } 3094 3095 /* max size of subsets */ 3096 mss = 0; 3097 for (i=0;i<sub_schurs->n_subs;i++) { 3098 PetscInt subset_size; 3099 3100 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3101 mss = PetscMax(mss,subset_size); 3102 } 3103 3104 /* min/max and threshold */ 3105 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3106 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3107 nmax = PetscMax(nmin,nmax); 3108 allocated_S_St = PETSC_FALSE; 3109 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3110 allocated_S_St = PETSC_TRUE; 3111 } 3112 3113 /* allocate lapack workspace */ 3114 cum = cum2 = 0; 3115 maxneigs = 0; 3116 for (i=0;i<sub_schurs->n_subs;i++) { 3117 PetscInt n,subset_size; 3118 3119 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3120 n = PetscMin(subset_size,nmax); 3121 cum += subset_size; 3122 cum2 += subset_size*n; 3123 maxneigs = PetscMax(maxneigs,n); 3124 } 3125 if (mss) { 3126 if (sub_schurs->is_symmetric) { 3127 PetscBLASInt B_itype = 1; 3128 PetscBLASInt B_N = mss; 3129 PetscReal zero = 0.0; 3130 PetscReal eps = 0.0; /* dlamch? */ 3131 3132 B_lwork = -1; 3133 S = NULL; 3134 St = NULL; 3135 eigs = NULL; 3136 eigv = NULL; 3137 B_iwork = NULL; 3138 B_ifail = NULL; 3139 #if defined(PETSC_USE_COMPLEX) 3140 rwork = NULL; 3141 #endif 3142 thresh = 1.0; 3143 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3144 #if defined(PETSC_USE_COMPLEX) 3145 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3146 #else 3147 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3148 #endif 3149 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3150 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3151 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3152 } else { 3153 lwork = 0; 3154 } 3155 3156 nv = 0; 3157 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3158 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3159 } 3160 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3161 if (allocated_S_St) { 3162 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3163 } 3164 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3165 #if defined(PETSC_USE_COMPLEX) 3166 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3167 #endif 3168 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3169 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3170 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3171 nv+cum,&pcbddc->adaptive_constraints_idxs, 3172 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3173 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 3174 3175 maxneigs = 0; 3176 cum = cumarray = 0; 3177 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3178 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3179 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3180 const PetscInt *idxs; 3181 3182 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3183 for (cum=0;cum<nv;cum++) { 3184 pcbddc->adaptive_constraints_n[cum] = 1; 3185 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3186 pcbddc->adaptive_constraints_data[cum] = 1.0; 3187 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3188 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3189 } 3190 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3191 } 3192 3193 if (mss) { /* multilevel */ 3194 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3195 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3196 } 3197 3198 lthresh = pcbddc->adaptive_threshold[0]; 3199 uthresh = pcbddc->adaptive_threshold[1]; 3200 for (i=0;i<sub_schurs->n_subs;i++) { 3201 const PetscInt *idxs; 3202 PetscReal upper,lower; 3203 PetscInt j,subset_size,eigs_start = 0; 3204 PetscBLASInt B_N; 3205 PetscBool same_data = PETSC_FALSE; 3206 PetscBool scal = PETSC_FALSE; 3207 3208 if (pcbddc->use_deluxe_scaling) { 3209 upper = PETSC_MAX_REAL; 3210 lower = uthresh; 3211 } else { 3212 if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3213 upper = 1./uthresh; 3214 lower = 0.; 3215 } 3216 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3217 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3218 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3219 /* this is experimental: we assume the dofs have been properly grouped to have 3220 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3221 if (!sub_schurs->is_posdef) { 3222 Mat T; 3223 3224 for (j=0;j<subset_size;j++) { 3225 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3226 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);CHKERRQ(ierr); 3227 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3228 ierr = MatDestroy(&T);CHKERRQ(ierr); 3229 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);CHKERRQ(ierr); 3230 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3231 ierr = MatDestroy(&T);CHKERRQ(ierr); 3232 if (sub_schurs->change_primal_sub) { 3233 PetscInt nz,k; 3234 const PetscInt *idxs; 3235 3236 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);CHKERRQ(ierr); 3237 ierr = ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3238 for (k=0;k<nz;k++) { 3239 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3240 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3241 } 3242 ierr = ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3243 } 3244 scal = PETSC_TRUE; 3245 break; 3246 } 3247 } 3248 } 3249 3250 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3251 if (sub_schurs->is_symmetric) { 3252 PetscInt j,k; 3253 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 3254 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3255 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3256 } 3257 for (j=0;j<subset_size;j++) { 3258 for (k=j;k<subset_size;k++) { 3259 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3260 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3261 } 3262 } 3263 } else { 3264 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3265 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3266 } 3267 } else { 3268 S = Sarray + cumarray; 3269 St = Starray + cumarray; 3270 } 3271 /* see if we can save some work */ 3272 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3273 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 3274 } 3275 3276 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3277 B_neigs = 0; 3278 } else { 3279 if (sub_schurs->is_symmetric) { 3280 PetscBLASInt B_itype = 1; 3281 PetscBLASInt B_IL, B_IU; 3282 PetscReal eps = -1.0; /* dlamch? */ 3283 PetscInt nmin_s; 3284 PetscBool compute_range; 3285 3286 B_neigs = 0; 3287 compute_range = (PetscBool)!same_data; 3288 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3289 3290 if (pcbddc->dbg_flag) { 3291 PetscInt nc = 0; 3292 3293 if (sub_schurs->change_primal_sub) { 3294 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);CHKERRQ(ierr); 3295 } 3296 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);CHKERRQ(ierr); 3297 } 3298 3299 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3300 if (compute_range) { 3301 3302 /* ask for eigenvalues larger than thresh */ 3303 if (sub_schurs->is_posdef) { 3304 #if defined(PETSC_USE_COMPLEX) 3305 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3306 #else 3307 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3308 #endif 3309 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3310 } else { /* no theory so far, but it works nicely */ 3311 PetscInt recipe = 0,recipe_m = 1; 3312 PetscReal bb[2]; 3313 3314 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);CHKERRQ(ierr); 3315 switch (recipe) { 3316 case 0: 3317 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3318 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3319 #if defined(PETSC_USE_COMPLEX) 3320 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3321 #else 3322 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3323 #endif 3324 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3325 break; 3326 case 1: 3327 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3328 #if defined(PETSC_USE_COMPLEX) 3329 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3330 #else 3331 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3332 #endif 3333 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3334 if (!scal) { 3335 PetscBLASInt B_neigs2 = 0; 3336 3337 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3338 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3339 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3340 #if defined(PETSC_USE_COMPLEX) 3341 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3342 #else 3343 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3344 #endif 3345 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3346 B_neigs += B_neigs2; 3347 } 3348 break; 3349 case 2: 3350 if (scal) { 3351 bb[0] = PETSC_MIN_REAL; 3352 bb[1] = 0; 3353 #if defined(PETSC_USE_COMPLEX) 3354 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3355 #else 3356 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3357 #endif 3358 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3359 } else { 3360 PetscBLASInt B_neigs2 = 0; 3361 PetscBool import = PETSC_FALSE; 3362 3363 lthresh = PetscMax(lthresh,0.0); 3364 if (lthresh > 0.0) { 3365 bb[0] = PETSC_MIN_REAL; 3366 bb[1] = lthresh*lthresh; 3367 3368 import = PETSC_TRUE; 3369 #if defined(PETSC_USE_COMPLEX) 3370 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3371 #else 3372 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3373 #endif 3374 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3375 } 3376 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3377 bb[1] = PETSC_MAX_REAL; 3378 if (import) { 3379 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3380 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3381 } 3382 #if defined(PETSC_USE_COMPLEX) 3383 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3384 #else 3385 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3386 #endif 3387 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3388 B_neigs += B_neigs2; 3389 } 3390 break; 3391 case 3: 3392 if (scal) { 3393 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);CHKERRQ(ierr); 3394 } else { 3395 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);CHKERRQ(ierr); 3396 } 3397 if (!scal) { 3398 bb[0] = uthresh; 3399 bb[1] = PETSC_MAX_REAL; 3400 #if defined(PETSC_USE_COMPLEX) 3401 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3402 #else 3403 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3404 #endif 3405 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3406 } 3407 if (recipe_m > 0 && B_N - B_neigs > 0) { 3408 PetscBLASInt B_neigs2 = 0; 3409 3410 B_IL = 1; 3411 ierr = PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);CHKERRQ(ierr); 3412 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3413 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3414 #if defined(PETSC_USE_COMPLEX) 3415 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3416 #else 3417 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3418 #endif 3419 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3420 B_neigs += B_neigs2; 3421 } 3422 break; 3423 case 4: 3424 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3425 #if defined(PETSC_USE_COMPLEX) 3426 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3427 #else 3428 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3429 #endif 3430 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3431 { 3432 PetscBLASInt B_neigs2 = 0; 3433 3434 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL; 3435 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3436 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3437 #if defined(PETSC_USE_COMPLEX) 3438 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3439 #else 3440 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3441 #endif 3442 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3443 B_neigs += B_neigs2; 3444 } 3445 break; 3446 case 5: /* same as before: first compute all eigenvalues, then filter */ 3447 #if defined(PETSC_USE_COMPLEX) 3448 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3449 #else 3450 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3451 #endif 3452 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3453 { 3454 PetscInt e,k,ne; 3455 for (e=0,ne=0;e<B_neigs;e++) { 3456 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3457 for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k]; 3458 eigs[ne] = eigs[e]; 3459 ne++; 3460 } 3461 } 3462 ierr = PetscMemcpy(eigv,S,B_N*ne*sizeof(PetscScalar));CHKERRQ(ierr); 3463 B_neigs = ne; 3464 } 3465 break; 3466 default: 3467 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe); 3468 break; 3469 } 3470 } 3471 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3472 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3473 B_IL = 1; 3474 #if defined(PETSC_USE_COMPLEX) 3475 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3476 #else 3477 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3478 #endif 3479 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3480 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3481 PetscInt k; 3482 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3483 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3484 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3485 nmin = nmax; 3486 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 3487 for (k=0;k<nmax;k++) { 3488 eigs[k] = 1./PETSC_SMALL; 3489 eigv[k*(subset_size+1)] = 1.0; 3490 } 3491 } 3492 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3493 if (B_ierr) { 3494 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3495 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3496 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3497 } 3498 3499 if (B_neigs > nmax) { 3500 if (pcbddc->dbg_flag) { 3501 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %D.\n",B_neigs,nmax);CHKERRQ(ierr); 3502 } 3503 if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax; 3504 B_neigs = nmax; 3505 } 3506 3507 nmin_s = PetscMin(nmin,B_N); 3508 if (B_neigs < nmin_s) { 3509 PetscBLASInt B_neigs2 = 0; 3510 3511 if (pcbddc->use_deluxe_scaling) { 3512 if (scal) { 3513 B_IU = nmin_s; 3514 B_IL = B_neigs + 1; 3515 } else { 3516 B_IL = B_N - nmin_s + 1; 3517 B_IU = B_N - B_neigs; 3518 } 3519 } else { 3520 B_IL = B_neigs + 1; 3521 B_IU = nmin_s; 3522 } 3523 if (pcbddc->dbg_flag) { 3524 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);CHKERRQ(ierr); 3525 } 3526 if (sub_schurs->is_symmetric) { 3527 PetscInt j,k; 3528 for (j=0;j<subset_size;j++) { 3529 for (k=j;k<subset_size;k++) { 3530 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3531 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3532 } 3533 } 3534 } else { 3535 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3536 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3537 } 3538 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3539 #if defined(PETSC_USE_COMPLEX) 3540 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3541 #else 3542 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3543 #endif 3544 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3545 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3546 B_neigs += B_neigs2; 3547 } 3548 if (B_ierr) { 3549 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3550 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3551 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3552 } 3553 if (pcbddc->dbg_flag) { 3554 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3555 for (j=0;j<B_neigs;j++) { 3556 if (eigs[j] == 0.0) { 3557 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3558 } else { 3559 if (pcbddc->use_deluxe_scaling) { 3560 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3561 } else { 3562 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3563 } 3564 } 3565 } 3566 } 3567 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3568 } 3569 /* change the basis back to the original one */ 3570 if (sub_schurs->change) { 3571 Mat change,phi,phit; 3572 3573 if (pcbddc->dbg_flag > 2) { 3574 PetscInt ii; 3575 for (ii=0;ii<B_neigs;ii++) { 3576 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3577 for (j=0;j<B_N;j++) { 3578 #if defined(PETSC_USE_COMPLEX) 3579 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3580 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3581 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3582 #else 3583 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3584 #endif 3585 } 3586 } 3587 } 3588 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3589 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3590 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3591 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3592 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3593 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3594 } 3595 maxneigs = PetscMax(B_neigs,maxneigs); 3596 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3597 if (B_neigs) { 3598 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3599 3600 if (pcbddc->dbg_flag > 1) { 3601 PetscInt ii; 3602 for (ii=0;ii<B_neigs;ii++) { 3603 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3604 for (j=0;j<B_N;j++) { 3605 #if defined(PETSC_USE_COMPLEX) 3606 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3607 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3608 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3609 #else 3610 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3611 #endif 3612 } 3613 } 3614 } 3615 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3616 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3617 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3618 cum++; 3619 } 3620 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3621 /* shift for next computation */ 3622 cumarray += subset_size*subset_size; 3623 } 3624 if (pcbddc->dbg_flag) { 3625 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3626 } 3627 3628 if (mss) { 3629 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3630 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3631 /* destroy matrices (junk) */ 3632 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3633 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3634 } 3635 if (allocated_S_St) { 3636 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3637 } 3638 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3639 #if defined(PETSC_USE_COMPLEX) 3640 ierr = PetscFree(rwork);CHKERRQ(ierr); 3641 #endif 3642 if (pcbddc->dbg_flag) { 3643 PetscInt maxneigs_r; 3644 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3645 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);CHKERRQ(ierr); 3646 } 3647 ierr = PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3648 PetscFunctionReturn(0); 3649 } 3650 3651 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3652 { 3653 PetscScalar *coarse_submat_vals; 3654 PetscErrorCode ierr; 3655 3656 PetscFunctionBegin; 3657 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3658 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3659 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3660 3661 /* Setup local neumann solver ksp_R */ 3662 /* PCBDDCSetUpLocalScatters should be called first! */ 3663 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3664 3665 /* 3666 Setup local correction and local part of coarse basis. 3667 Gives back the dense local part of the coarse matrix in column major ordering 3668 */ 3669 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3670 3671 /* Compute total number of coarse nodes and setup coarse solver */ 3672 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3673 3674 /* free */ 3675 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3676 PetscFunctionReturn(0); 3677 } 3678 3679 PetscErrorCode PCBDDCResetCustomization(PC pc) 3680 { 3681 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3682 PetscErrorCode ierr; 3683 3684 PetscFunctionBegin; 3685 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3686 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3687 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3688 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3689 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3690 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3691 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3692 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3693 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3694 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3695 PetscFunctionReturn(0); 3696 } 3697 3698 PetscErrorCode PCBDDCResetTopography(PC pc) 3699 { 3700 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3701 PetscInt i; 3702 PetscErrorCode ierr; 3703 3704 PetscFunctionBegin; 3705 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3706 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3707 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3708 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3709 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3710 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3711 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3712 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3713 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3714 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3715 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3716 for (i=0;i<pcbddc->n_local_subs;i++) { 3717 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3718 } 3719 pcbddc->n_local_subs = 0; 3720 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3721 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3722 pcbddc->graphanalyzed = PETSC_FALSE; 3723 pcbddc->recompute_topography = PETSC_TRUE; 3724 pcbddc->corner_selected = PETSC_FALSE; 3725 PetscFunctionReturn(0); 3726 } 3727 3728 PetscErrorCode PCBDDCResetSolvers(PC pc) 3729 { 3730 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3731 PetscErrorCode ierr; 3732 3733 PetscFunctionBegin; 3734 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3735 if (pcbddc->coarse_phi_B) { 3736 PetscScalar *array; 3737 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3738 ierr = PetscFree(array);CHKERRQ(ierr); 3739 } 3740 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3741 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3742 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3743 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3744 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3745 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3746 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3747 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3748 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3749 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3750 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3751 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3752 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3753 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3754 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3755 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3756 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3757 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3758 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3759 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3760 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3761 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3762 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3763 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3764 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3765 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3766 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3767 if (pcbddc->benign_zerodiag_subs) { 3768 PetscInt i; 3769 for (i=0;i<pcbddc->benign_n;i++) { 3770 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3771 } 3772 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3773 } 3774 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3775 PetscFunctionReturn(0); 3776 } 3777 3778 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3779 { 3780 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3781 PC_IS *pcis = (PC_IS*)pc->data; 3782 VecType impVecType; 3783 PetscInt n_constraints,n_R,old_size; 3784 PetscErrorCode ierr; 3785 3786 PetscFunctionBegin; 3787 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3788 n_R = pcis->n - pcbddc->n_vertices; 3789 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3790 /* local work vectors (try to avoid unneeded work)*/ 3791 /* R nodes */ 3792 old_size = -1; 3793 if (pcbddc->vec1_R) { 3794 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3795 } 3796 if (n_R != old_size) { 3797 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3798 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3799 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3800 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3801 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3802 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3803 } 3804 /* local primal dofs */ 3805 old_size = -1; 3806 if (pcbddc->vec1_P) { 3807 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3808 } 3809 if (pcbddc->local_primal_size != old_size) { 3810 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3811 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3812 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3813 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3814 } 3815 /* local explicit constraints */ 3816 old_size = -1; 3817 if (pcbddc->vec1_C) { 3818 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3819 } 3820 if (n_constraints && n_constraints != old_size) { 3821 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3822 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3823 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3824 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3825 } 3826 PetscFunctionReturn(0); 3827 } 3828 3829 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3830 { 3831 PetscErrorCode ierr; 3832 /* pointers to pcis and pcbddc */ 3833 PC_IS* pcis = (PC_IS*)pc->data; 3834 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3835 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3836 /* submatrices of local problem */ 3837 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3838 /* submatrices of local coarse problem */ 3839 Mat S_VV,S_CV,S_VC,S_CC; 3840 /* working matrices */ 3841 Mat C_CR; 3842 /* additional working stuff */ 3843 PC pc_R; 3844 Mat F,Brhs = NULL; 3845 Vec dummy_vec; 3846 PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs; 3847 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3848 PetscScalar *work; 3849 PetscInt *idx_V_B; 3850 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3851 PetscInt i,n_R,n_D,n_B; 3852 3853 /* some shortcuts to scalars */ 3854 PetscScalar one=1.0,m_one=-1.0; 3855 3856 PetscFunctionBegin; 3857 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3858 ierr = PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3859 3860 /* Set Non-overlapping dimensions */ 3861 n_vertices = pcbddc->n_vertices; 3862 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3863 n_B = pcis->n_B; 3864 n_D = pcis->n - n_B; 3865 n_R = pcis->n - n_vertices; 3866 3867 /* vertices in boundary numbering */ 3868 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3869 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3870 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i); 3871 3872 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3873 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3874 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3875 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3876 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3877 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3878 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3879 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3880 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3881 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3882 3883 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3884 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3885 ierr = PCSetUp(pc_R);CHKERRQ(ierr); 3886 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3887 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3888 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3889 lda_rhs = n_R; 3890 need_benign_correction = PETSC_FALSE; 3891 if (isLU || isILU || isCHOL) { 3892 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3893 } else if (sub_schurs && sub_schurs->reuse_solver) { 3894 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3895 MatFactorType type; 3896 3897 F = reuse_solver->F; 3898 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3899 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3900 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3901 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3902 } else { 3903 F = NULL; 3904 } 3905 3906 /* determine if we can use a sparse right-hand side */ 3907 sparserhs = PETSC_FALSE; 3908 if (F) { 3909 MatSolverType solver; 3910 3911 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3912 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3913 } 3914 3915 /* allocate workspace */ 3916 n = 0; 3917 if (n_constraints) { 3918 n += lda_rhs*n_constraints; 3919 } 3920 if (n_vertices) { 3921 n = PetscMax(2*lda_rhs*n_vertices,n); 3922 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3923 } 3924 if (!pcbddc->symmetric_primal) { 3925 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3926 } 3927 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3928 3929 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3930 dummy_vec = NULL; 3931 if (need_benign_correction && lda_rhs != n_R && F) { 3932 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);CHKERRQ(ierr); 3933 ierr = VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);CHKERRQ(ierr); 3934 ierr = VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);CHKERRQ(ierr); 3935 } 3936 3937 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3938 if (n_constraints) { 3939 Mat M3,C_B; 3940 IS is_aux; 3941 PetscScalar *array,*array2; 3942 3943 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3944 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3945 3946 /* Extract constraints on R nodes: C_{CR} */ 3947 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3948 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3949 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3950 3951 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3952 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3953 if (!sparserhs) { 3954 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3955 for (i=0;i<n_constraints;i++) { 3956 const PetscScalar *row_cmat_values; 3957 const PetscInt *row_cmat_indices; 3958 PetscInt size_of_constraint,j; 3959 3960 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3961 for (j=0;j<size_of_constraint;j++) { 3962 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3963 } 3964 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3965 } 3966 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 3967 } else { 3968 Mat tC_CR; 3969 3970 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3971 if (lda_rhs != n_R) { 3972 PetscScalar *aa; 3973 PetscInt r,*ii,*jj; 3974 PetscBool done; 3975 3976 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3977 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 3978 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 3979 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 3980 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3981 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 3982 } else { 3983 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 3984 tC_CR = C_CR; 3985 } 3986 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 3987 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 3988 } 3989 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3990 if (F) { 3991 if (need_benign_correction) { 3992 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3993 3994 /* rhs is already zero on interior dofs, no need to change the rhs */ 3995 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3996 } 3997 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 3998 if (need_benign_correction) { 3999 PetscScalar *marr; 4000 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4001 4002 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4003 if (lda_rhs != n_R) { 4004 for (i=0;i<n_constraints;i++) { 4005 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4006 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4007 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4008 } 4009 } else { 4010 for (i=0;i<n_constraints;i++) { 4011 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4012 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4013 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4014 } 4015 } 4016 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4017 } 4018 } else { 4019 PetscScalar *marr; 4020 4021 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4022 for (i=0;i<n_constraints;i++) { 4023 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4024 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 4025 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4026 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4027 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4028 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4029 } 4030 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 4031 } 4032 if (sparserhs) { 4033 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 4034 } 4035 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4036 if (!pcbddc->switch_static) { 4037 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4038 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 4039 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 4040 for (i=0;i<n_constraints;i++) { 4041 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 4042 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 4043 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4044 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4045 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4046 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4047 } 4048 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 4049 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 4050 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4051 } else { 4052 if (lda_rhs != n_R) { 4053 IS dummy; 4054 4055 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 4056 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 4057 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 4058 } else { 4059 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 4060 pcbddc->local_auxmat2 = local_auxmat2_R; 4061 } 4062 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4063 } 4064 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4065 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 4066 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 4067 if (isCHOL) { 4068 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 4069 } else { 4070 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 4071 } 4072 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 4073 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4074 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 4075 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4076 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4077 ierr = MatDestroy(&M3);CHKERRQ(ierr); 4078 } 4079 4080 /* Get submatrices from subdomain matrix */ 4081 if (n_vertices) { 4082 IS is_aux; 4083 PetscBool isseqaij; 4084 4085 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4086 IS tis; 4087 4088 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 4089 ierr = ISSort(tis);CHKERRQ(ierr); 4090 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 4091 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4092 } else { 4093 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 4094 } 4095 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 4096 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 4097 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4098 if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */ 4099 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4100 } 4101 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 4102 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4103 } 4104 4105 /* Matrix of coarse basis functions (local) */ 4106 if (pcbddc->coarse_phi_B) { 4107 PetscInt on_B,on_primal,on_D=n_D; 4108 if (pcbddc->coarse_phi_D) { 4109 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 4110 } 4111 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 4112 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4113 PetscScalar *marray; 4114 4115 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 4116 ierr = PetscFree(marray);CHKERRQ(ierr); 4117 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4118 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4119 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4120 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4121 } 4122 } 4123 4124 if (!pcbddc->coarse_phi_B) { 4125 PetscScalar *marr; 4126 4127 /* memory size */ 4128 n = n_B*pcbddc->local_primal_size; 4129 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4130 if (!pcbddc->symmetric_primal) n *= 2; 4131 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 4132 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4133 marr += n_B*pcbddc->local_primal_size; 4134 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4135 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4136 marr += n_D*pcbddc->local_primal_size; 4137 } 4138 if (!pcbddc->symmetric_primal) { 4139 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4140 marr += n_B*pcbddc->local_primal_size; 4141 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4142 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4143 } 4144 } else { 4145 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 4146 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4147 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4148 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 4149 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4150 } 4151 } 4152 } 4153 4154 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4155 p0_lidx_I = NULL; 4156 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4157 const PetscInt *idxs; 4158 4159 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4160 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 4161 for (i=0;i<pcbddc->benign_n;i++) { 4162 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 4163 } 4164 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4165 } 4166 4167 /* vertices */ 4168 if (n_vertices) { 4169 PetscBool restoreavr = PETSC_FALSE; 4170 4171 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 4172 4173 if (n_R) { 4174 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4175 PetscBLASInt B_N,B_one = 1; 4176 PetscScalar *x,*y; 4177 4178 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 4179 if (need_benign_correction) { 4180 ISLocalToGlobalMapping RtoN; 4181 IS is_p0; 4182 PetscInt *idxs_p0,n; 4183 4184 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 4185 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 4186 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 4187 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n); 4188 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 4189 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 4190 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 4191 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 4192 } 4193 4194 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4195 if (!sparserhs || need_benign_correction) { 4196 if (lda_rhs == n_R) { 4197 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4198 } else { 4199 PetscScalar *av,*array; 4200 const PetscInt *xadj,*adjncy; 4201 PetscInt n; 4202 PetscBool flg_row; 4203 4204 array = work+lda_rhs*n_vertices; 4205 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4206 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4207 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4208 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 4209 for (i=0;i<n;i++) { 4210 PetscInt j; 4211 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4212 } 4213 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4214 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4215 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 4216 } 4217 if (need_benign_correction) { 4218 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4219 PetscScalar *marr; 4220 4221 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 4222 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4223 4224 | 0 0 0 | (V) 4225 L = | 0 0 -1 | (P-p0) 4226 | 0 0 -1 | (p0) 4227 4228 */ 4229 for (i=0;i<reuse_solver->benign_n;i++) { 4230 const PetscScalar *vals; 4231 const PetscInt *idxs,*idxs_zero; 4232 PetscInt n,j,nz; 4233 4234 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4235 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4236 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4237 for (j=0;j<n;j++) { 4238 PetscScalar val = vals[j]; 4239 PetscInt k,col = idxs[j]; 4240 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4241 } 4242 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4243 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4244 } 4245 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 4246 } 4247 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 4248 Brhs = A_RV; 4249 } else { 4250 Mat tA_RVT,A_RVT; 4251 4252 if (!pcbddc->symmetric_primal) { 4253 /* A_RV already scaled by -1 */ 4254 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 4255 } else { 4256 restoreavr = PETSC_TRUE; 4257 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4258 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 4259 A_RVT = A_VR; 4260 } 4261 if (lda_rhs != n_R) { 4262 PetscScalar *aa; 4263 PetscInt r,*ii,*jj; 4264 PetscBool done; 4265 4266 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4267 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4268 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4269 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4270 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4271 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4272 } else { 4273 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4274 tA_RVT = A_RVT; 4275 } 4276 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4277 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4278 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4279 } 4280 if (F) { 4281 /* need to correct the rhs */ 4282 if (need_benign_correction) { 4283 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4284 PetscScalar *marr; 4285 4286 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4287 if (lda_rhs != n_R) { 4288 for (i=0;i<n_vertices;i++) { 4289 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4290 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4291 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4292 } 4293 } else { 4294 for (i=0;i<n_vertices;i++) { 4295 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4296 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4297 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4298 } 4299 } 4300 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4301 } 4302 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4303 if (restoreavr) { 4304 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4305 } 4306 /* need to correct the solution */ 4307 if (need_benign_correction) { 4308 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4309 PetscScalar *marr; 4310 4311 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4312 if (lda_rhs != n_R) { 4313 for (i=0;i<n_vertices;i++) { 4314 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4315 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4316 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4317 } 4318 } else { 4319 for (i=0;i<n_vertices;i++) { 4320 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4321 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4322 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4323 } 4324 } 4325 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4326 } 4327 } else { 4328 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4329 for (i=0;i<n_vertices;i++) { 4330 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4331 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4332 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4333 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4334 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4335 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4336 } 4337 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4338 } 4339 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4340 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4341 /* S_VV and S_CV */ 4342 if (n_constraints) { 4343 Mat B; 4344 4345 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4346 for (i=0;i<n_vertices;i++) { 4347 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4348 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4349 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4350 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4351 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4352 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4353 } 4354 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4355 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 4356 ierr = MatDestroy(&B);CHKERRQ(ierr); 4357 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4358 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4359 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4360 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4361 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4362 ierr = MatDestroy(&B);CHKERRQ(ierr); 4363 } 4364 if (lda_rhs != n_R) { 4365 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4366 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4367 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4368 } 4369 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4370 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4371 if (need_benign_correction) { 4372 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4373 PetscScalar *marr,*sums; 4374 4375 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4376 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4377 for (i=0;i<reuse_solver->benign_n;i++) { 4378 const PetscScalar *vals; 4379 const PetscInt *idxs,*idxs_zero; 4380 PetscInt n,j,nz; 4381 4382 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4383 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4384 for (j=0;j<n_vertices;j++) { 4385 PetscInt k; 4386 sums[j] = 0.; 4387 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4388 } 4389 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4390 for (j=0;j<n;j++) { 4391 PetscScalar val = vals[j]; 4392 PetscInt k; 4393 for (k=0;k<n_vertices;k++) { 4394 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4395 } 4396 } 4397 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4398 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4399 } 4400 ierr = PetscFree(sums);CHKERRQ(ierr); 4401 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4402 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4403 } 4404 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4405 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4406 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 4407 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4408 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4409 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 4410 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4411 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4412 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4413 } else { 4414 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4415 } 4416 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4417 4418 /* coarse basis functions */ 4419 for (i=0;i<n_vertices;i++) { 4420 PetscScalar *y; 4421 4422 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4423 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4424 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4425 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4426 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4427 y[n_B*i+idx_V_B[i]] = 1.0; 4428 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4429 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4430 4431 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4432 PetscInt j; 4433 4434 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4435 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4436 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4437 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4438 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4439 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4440 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4441 } 4442 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4443 } 4444 /* if n_R == 0 the object is not destroyed */ 4445 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4446 } 4447 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4448 4449 if (n_constraints) { 4450 Mat B; 4451 4452 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4453 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4454 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4455 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4456 if (n_vertices) { 4457 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4458 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4459 } else { 4460 Mat S_VCt; 4461 4462 if (lda_rhs != n_R) { 4463 ierr = MatDestroy(&B);CHKERRQ(ierr); 4464 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4465 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4466 } 4467 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4468 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4469 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4470 } 4471 } 4472 ierr = MatDestroy(&B);CHKERRQ(ierr); 4473 /* coarse basis functions */ 4474 for (i=0;i<n_constraints;i++) { 4475 PetscScalar *y; 4476 4477 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4478 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4479 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4480 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4481 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4482 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4483 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4484 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4485 PetscInt j; 4486 4487 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4488 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4489 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4490 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4491 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4492 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4493 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4494 } 4495 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4496 } 4497 } 4498 if (n_constraints) { 4499 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4500 } 4501 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4502 4503 /* coarse matrix entries relative to B_0 */ 4504 if (pcbddc->benign_n) { 4505 Mat B0_B,B0_BPHI; 4506 IS is_dummy; 4507 PetscScalar *data; 4508 PetscInt j; 4509 4510 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4511 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4512 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4513 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4514 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4515 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 4516 for (j=0;j<pcbddc->benign_n;j++) { 4517 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4518 for (i=0;i<pcbddc->local_primal_size;i++) { 4519 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4520 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4521 } 4522 } 4523 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 4524 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4525 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4526 } 4527 4528 /* compute other basis functions for non-symmetric problems */ 4529 if (!pcbddc->symmetric_primal) { 4530 Mat B_V=NULL,B_C=NULL; 4531 PetscScalar *marray; 4532 4533 if (n_constraints) { 4534 Mat S_CCT,C_CRT; 4535 4536 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4537 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4538 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4539 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4540 if (n_vertices) { 4541 Mat S_VCT; 4542 4543 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4544 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4545 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4546 } 4547 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4548 } else { 4549 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4550 } 4551 if (n_vertices && n_R) { 4552 PetscScalar *av,*marray; 4553 const PetscInt *xadj,*adjncy; 4554 PetscInt n; 4555 PetscBool flg_row; 4556 4557 /* B_V = B_V - A_VR^T */ 4558 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4559 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4560 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4561 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4562 for (i=0;i<n;i++) { 4563 PetscInt j; 4564 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4565 } 4566 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4567 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4568 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4569 } 4570 4571 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4572 if (n_vertices) { 4573 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4574 for (i=0;i<n_vertices;i++) { 4575 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4576 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4577 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4578 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4579 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4580 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4581 } 4582 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4583 } 4584 if (B_C) { 4585 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4586 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4587 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4588 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4589 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4590 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 4591 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4592 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4593 } 4594 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4595 } 4596 /* coarse basis functions */ 4597 for (i=0;i<pcbddc->local_primal_size;i++) { 4598 PetscScalar *y; 4599 4600 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4601 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4602 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4603 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4604 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4605 if (i<n_vertices) { 4606 y[n_B*i+idx_V_B[i]] = 1.0; 4607 } 4608 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4609 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4610 4611 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4612 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4613 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4614 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4615 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4616 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4617 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4618 } 4619 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4620 } 4621 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4622 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4623 } 4624 4625 /* free memory */ 4626 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4627 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4628 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4629 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4630 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4631 ierr = PetscFree(work);CHKERRQ(ierr); 4632 if (n_vertices) { 4633 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4634 } 4635 if (n_constraints) { 4636 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4637 } 4638 /* Checking coarse_sub_mat and coarse basis functios */ 4639 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4640 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4641 if (pcbddc->dbg_flag) { 4642 Mat coarse_sub_mat; 4643 Mat AUXMAT,TM1,TM2,TM3,TM4; 4644 Mat coarse_phi_D,coarse_phi_B; 4645 Mat coarse_psi_D,coarse_psi_B; 4646 Mat A_II,A_BB,A_IB,A_BI; 4647 Mat C_B,CPHI; 4648 IS is_dummy; 4649 Vec mones; 4650 MatType checkmattype=MATSEQAIJ; 4651 PetscReal real_value; 4652 4653 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4654 Mat A; 4655 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4656 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4657 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4658 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4659 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4660 ierr = MatDestroy(&A);CHKERRQ(ierr); 4661 } else { 4662 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4663 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4664 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4665 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4666 } 4667 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4668 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4669 if (!pcbddc->symmetric_primal) { 4670 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4671 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4672 } 4673 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4674 4675 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4676 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4677 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4678 if (!pcbddc->symmetric_primal) { 4679 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4680 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4681 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4682 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4683 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4684 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4685 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4686 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4687 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4688 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4689 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4690 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4691 } else { 4692 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4693 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4694 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4695 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4696 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4697 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4698 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4699 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4700 } 4701 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4702 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4703 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4704 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4705 if (pcbddc->benign_n) { 4706 Mat B0_B,B0_BPHI; 4707 PetscScalar *data,*data2; 4708 PetscInt j; 4709 4710 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4711 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4712 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4713 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4714 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4715 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4716 for (j=0;j<pcbddc->benign_n;j++) { 4717 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4718 for (i=0;i<pcbddc->local_primal_size;i++) { 4719 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4720 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4721 } 4722 } 4723 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4724 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4725 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4726 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4727 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4728 } 4729 #if 0 4730 { 4731 PetscViewer viewer; 4732 char filename[256]; 4733 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4734 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4735 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4736 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4737 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4738 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4739 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4740 if (pcbddc->coarse_phi_B) { 4741 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4742 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4743 } 4744 if (pcbddc->coarse_phi_D) { 4745 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4746 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4747 } 4748 if (pcbddc->coarse_psi_B) { 4749 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4750 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4751 } 4752 if (pcbddc->coarse_psi_D) { 4753 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4754 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4755 } 4756 ierr = PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");CHKERRQ(ierr); 4757 ierr = MatView(pcbddc->local_mat,viewer);CHKERRQ(ierr); 4758 ierr = PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");CHKERRQ(ierr); 4759 ierr = MatView(pcbddc->ConstraintMatrix,viewer);CHKERRQ(ierr); 4760 ierr = PetscObjectSetName((PetscObject)pcis->is_I_local,"I");CHKERRQ(ierr); 4761 ierr = ISView(pcis->is_I_local,viewer);CHKERRQ(ierr); 4762 ierr = PetscObjectSetName((PetscObject)pcis->is_B_local,"B");CHKERRQ(ierr); 4763 ierr = ISView(pcis->is_B_local,viewer);CHKERRQ(ierr); 4764 ierr = PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");CHKERRQ(ierr); 4765 ierr = ISView(pcbddc->is_R_local,viewer);CHKERRQ(ierr); 4766 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4767 } 4768 #endif 4769 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4770 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4771 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4772 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4773 4774 /* check constraints */ 4775 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4776 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4777 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4778 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4779 } else { 4780 PetscScalar *data; 4781 Mat tmat; 4782 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4783 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4784 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4785 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4786 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4787 } 4788 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4789 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4790 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4791 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4792 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4793 if (!pcbddc->symmetric_primal) { 4794 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4795 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4796 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4797 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4798 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4799 } 4800 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4801 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4802 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4803 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4804 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4805 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4806 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4807 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4808 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4809 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4810 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4811 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4812 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4813 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4814 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4815 if (!pcbddc->symmetric_primal) { 4816 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4817 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4818 } 4819 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4820 } 4821 /* get back data */ 4822 *coarse_submat_vals_n = coarse_submat_vals; 4823 ierr = PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 4824 PetscFunctionReturn(0); 4825 } 4826 4827 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4828 { 4829 Mat *work_mat; 4830 IS isrow_s,iscol_s; 4831 PetscBool rsorted,csorted; 4832 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4833 PetscErrorCode ierr; 4834 4835 PetscFunctionBegin; 4836 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4837 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4838 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4839 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4840 4841 if (!rsorted) { 4842 const PetscInt *idxs; 4843 PetscInt *idxs_sorted,i; 4844 4845 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4846 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4847 for (i=0;i<rsize;i++) { 4848 idxs_perm_r[i] = i; 4849 } 4850 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4851 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4852 for (i=0;i<rsize;i++) { 4853 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4854 } 4855 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4856 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4857 } else { 4858 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4859 isrow_s = isrow; 4860 } 4861 4862 if (!csorted) { 4863 if (isrow == iscol) { 4864 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4865 iscol_s = isrow_s; 4866 } else { 4867 const PetscInt *idxs; 4868 PetscInt *idxs_sorted,i; 4869 4870 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4871 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4872 for (i=0;i<csize;i++) { 4873 idxs_perm_c[i] = i; 4874 } 4875 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4876 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4877 for (i=0;i<csize;i++) { 4878 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4879 } 4880 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4881 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4882 } 4883 } else { 4884 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4885 iscol_s = iscol; 4886 } 4887 4888 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4889 4890 if (!rsorted || !csorted) { 4891 Mat new_mat; 4892 IS is_perm_r,is_perm_c; 4893 4894 if (!rsorted) { 4895 PetscInt *idxs_r,i; 4896 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4897 for (i=0;i<rsize;i++) { 4898 idxs_r[idxs_perm_r[i]] = i; 4899 } 4900 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4901 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4902 } else { 4903 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4904 } 4905 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4906 4907 if (!csorted) { 4908 if (isrow_s == iscol_s) { 4909 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4910 is_perm_c = is_perm_r; 4911 } else { 4912 PetscInt *idxs_c,i; 4913 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4914 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4915 for (i=0;i<csize;i++) { 4916 idxs_c[idxs_perm_c[i]] = i; 4917 } 4918 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4919 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4920 } 4921 } else { 4922 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4923 } 4924 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4925 4926 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4927 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4928 work_mat[0] = new_mat; 4929 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4930 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4931 } 4932 4933 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4934 *B = work_mat[0]; 4935 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4936 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4937 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4938 PetscFunctionReturn(0); 4939 } 4940 4941 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4942 { 4943 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4944 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4945 Mat new_mat,lA; 4946 IS is_local,is_global; 4947 PetscInt local_size; 4948 PetscBool isseqaij; 4949 PetscErrorCode ierr; 4950 4951 PetscFunctionBegin; 4952 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4953 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4954 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4955 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4956 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4957 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4958 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4959 4960 /* check */ 4961 if (pcbddc->dbg_flag) { 4962 Vec x,x_change; 4963 PetscReal error; 4964 4965 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4966 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4967 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4968 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4969 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4970 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4971 if (!pcbddc->change_interior) { 4972 const PetscScalar *x,*y,*v; 4973 PetscReal lerror = 0.; 4974 PetscInt i; 4975 4976 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4977 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4978 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4979 for (i=0;i<local_size;i++) 4980 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4981 lerror = PetscAbsScalar(x[i]-y[i]); 4982 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4983 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4984 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4985 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4986 if (error > PETSC_SMALL) { 4987 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4988 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error); 4989 } else { 4990 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error); 4991 } 4992 } 4993 } 4994 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4995 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4996 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4997 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4998 if (error > PETSC_SMALL) { 4999 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 5000 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 5001 } else { 5002 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error); 5003 } 5004 } 5005 ierr = VecDestroy(&x);CHKERRQ(ierr); 5006 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 5007 } 5008 5009 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 5010 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 5011 5012 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 5013 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 5014 if (isseqaij) { 5015 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5016 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 5017 if (lA) { 5018 Mat work; 5019 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 5020 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 5021 ierr = MatDestroy(&work);CHKERRQ(ierr); 5022 } 5023 } else { 5024 Mat work_mat; 5025 5026 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5027 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5028 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 5029 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 5030 if (lA) { 5031 Mat work; 5032 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 5033 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 5034 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 5035 ierr = MatDestroy(&work);CHKERRQ(ierr); 5036 } 5037 } 5038 if (matis->A->symmetric_set) { 5039 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 5040 #if !defined(PETSC_USE_COMPLEX) 5041 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 5042 #endif 5043 } 5044 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 5045 PetscFunctionReturn(0); 5046 } 5047 5048 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 5049 { 5050 PC_IS* pcis = (PC_IS*)(pc->data); 5051 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5052 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5053 PetscInt *idx_R_local=NULL; 5054 PetscInt n_vertices,i,j,n_R,n_D,n_B; 5055 PetscInt vbs,bs; 5056 PetscBT bitmask=NULL; 5057 PetscErrorCode ierr; 5058 5059 PetscFunctionBegin; 5060 /* 5061 No need to setup local scatters if 5062 - primal space is unchanged 5063 AND 5064 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5065 AND 5066 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5067 */ 5068 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 5069 PetscFunctionReturn(0); 5070 } 5071 /* destroy old objects */ 5072 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 5073 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 5074 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 5075 /* Set Non-overlapping dimensions */ 5076 n_B = pcis->n_B; 5077 n_D = pcis->n - n_B; 5078 n_vertices = pcbddc->n_vertices; 5079 5080 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5081 5082 /* create auxiliary bitmask and allocate workspace */ 5083 if (!sub_schurs || !sub_schurs->reuse_solver) { 5084 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 5085 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 5086 for (i=0;i<n_vertices;i++) { 5087 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 5088 } 5089 5090 for (i=0, n_R=0; i<pcis->n; i++) { 5091 if (!PetscBTLookup(bitmask,i)) { 5092 idx_R_local[n_R++] = i; 5093 } 5094 } 5095 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5096 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5097 5098 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5099 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 5100 } 5101 5102 /* Block code */ 5103 vbs = 1; 5104 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 5105 if (bs>1 && !(n_vertices%bs)) { 5106 PetscBool is_blocked = PETSC_TRUE; 5107 PetscInt *vary; 5108 if (!sub_schurs || !sub_schurs->reuse_solver) { 5109 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 5110 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 5111 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5112 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5113 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5114 for (i=0; i<pcis->n/bs; i++) { 5115 if (vary[i]!=0 && vary[i]!=bs) { 5116 is_blocked = PETSC_FALSE; 5117 break; 5118 } 5119 } 5120 ierr = PetscFree(vary);CHKERRQ(ierr); 5121 } else { 5122 /* Verify directly the R set */ 5123 for (i=0; i<n_R/bs; i++) { 5124 PetscInt j,node=idx_R_local[bs*i]; 5125 for (j=1; j<bs; j++) { 5126 if (node != idx_R_local[bs*i+j]-j) { 5127 is_blocked = PETSC_FALSE; 5128 break; 5129 } 5130 } 5131 } 5132 } 5133 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5134 vbs = bs; 5135 for (i=0;i<n_R/vbs;i++) { 5136 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5137 } 5138 } 5139 } 5140 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 5141 if (sub_schurs && sub_schurs->reuse_solver) { 5142 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5143 5144 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5145 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 5146 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 5147 reuse_solver->is_R = pcbddc->is_R_local; 5148 } else { 5149 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 5150 } 5151 5152 /* print some info if requested */ 5153 if (pcbddc->dbg_flag) { 5154 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5155 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5156 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5157 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 5158 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 5159 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 5160 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5161 } 5162 5163 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5164 if (!sub_schurs || !sub_schurs->reuse_solver) { 5165 IS is_aux1,is_aux2; 5166 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5167 5168 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5169 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 5170 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 5171 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5172 for (i=0; i<n_D; i++) { 5173 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 5174 } 5175 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5176 for (i=0, j=0; i<n_R; i++) { 5177 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5178 aux_array1[j++] = i; 5179 } 5180 } 5181 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5182 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5183 for (i=0, j=0; i<n_B; i++) { 5184 if (!PetscBTLookup(bitmask,is_indices[i])) { 5185 aux_array2[j++] = i; 5186 } 5187 } 5188 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5189 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 5190 ierr = VecScatterCreateWithData(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 5191 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5192 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 5193 5194 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5195 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 5196 for (i=0, j=0; i<n_R; i++) { 5197 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5198 aux_array1[j++] = i; 5199 } 5200 } 5201 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5202 ierr = VecScatterCreateWithData(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5203 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5204 } 5205 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 5206 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5207 } else { 5208 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5209 IS tis; 5210 PetscInt schur_size; 5211 5212 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 5213 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 5214 ierr = VecScatterCreateWithData(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 5215 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5216 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5217 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 5218 ierr = VecScatterCreateWithData(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5219 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5220 } 5221 } 5222 PetscFunctionReturn(0); 5223 } 5224 5225 5226 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5227 { 5228 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5229 PC_IS *pcis = (PC_IS*)pc->data; 5230 PC pc_temp; 5231 Mat A_RR; 5232 MatReuse reuse; 5233 PetscScalar m_one = -1.0; 5234 PetscReal value; 5235 PetscInt n_D,n_R; 5236 PetscBool check_corr,issbaij; 5237 PetscErrorCode ierr; 5238 /* prefixes stuff */ 5239 char dir_prefix[256],neu_prefix[256],str_level[16]; 5240 size_t len; 5241 5242 PetscFunctionBegin; 5243 ierr = PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5244 /* compute prefixes */ 5245 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 5246 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 5247 if (!pcbddc->current_level) { 5248 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));CHKERRQ(ierr); 5249 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));CHKERRQ(ierr); 5250 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5251 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5252 } else { 5253 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 5254 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5255 len -= 15; /* remove "pc_bddc_coarse_" */ 5256 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5257 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5258 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5259 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5260 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5261 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5262 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5263 ierr = PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));CHKERRQ(ierr); 5264 ierr = PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));CHKERRQ(ierr); 5265 } 5266 5267 /* DIRICHLET PROBLEM */ 5268 if (dirichlet) { 5269 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5270 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5271 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 5272 if (pcbddc->dbg_flag) { 5273 Mat A_IIn; 5274 5275 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5276 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5277 pcis->A_II = A_IIn; 5278 } 5279 } 5280 if (pcbddc->local_mat->symmetric_set) { 5281 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5282 } 5283 /* Matrix for Dirichlet problem is pcis->A_II */ 5284 n_D = pcis->n - pcis->n_B; 5285 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5286 void (*f)(void) = 0; 5287 5288 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5289 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5290 /* default */ 5291 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5292 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5293 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5294 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5295 if (issbaij) { 5296 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5297 } else { 5298 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5299 } 5300 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);CHKERRQ(ierr); 5301 /* Allow user's customization */ 5302 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5303 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5304 if (f && pcbddc->mat_graph->cloc) { 5305 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5306 const PetscInt *idxs; 5307 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5308 5309 ierr = ISGetLocalSize(pcis->is_I_local,&nl);CHKERRQ(ierr); 5310 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5311 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5312 for (i=0;i<nl;i++) { 5313 for (d=0;d<cdim;d++) { 5314 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5315 } 5316 } 5317 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5318 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5319 ierr = PetscFree(scoords);CHKERRQ(ierr); 5320 } 5321 } 5322 ierr = MatSetOptionsPrefix(pcis->A_II,((PetscObject)pcbddc->ksp_D)->prefix);CHKERRQ(ierr); 5323 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 5324 if (sub_schurs && sub_schurs->reuse_solver) { 5325 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5326 5327 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5328 } 5329 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5330 if (!n_D) { 5331 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5332 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5333 } 5334 /* set ksp_D into pcis data */ 5335 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5336 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5337 pcis->ksp_D = pcbddc->ksp_D; 5338 } 5339 5340 /* NEUMANN PROBLEM */ 5341 A_RR = 0; 5342 if (neumann) { 5343 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5344 PetscInt ibs,mbs; 5345 PetscBool issbaij, reuse_neumann_solver; 5346 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5347 5348 reuse_neumann_solver = PETSC_FALSE; 5349 if (sub_schurs && sub_schurs->reuse_solver) { 5350 IS iP; 5351 5352 reuse_neumann_solver = PETSC_TRUE; 5353 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5354 if (iP) reuse_neumann_solver = PETSC_FALSE; 5355 } 5356 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5357 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5358 if (pcbddc->ksp_R) { /* already created ksp */ 5359 PetscInt nn_R; 5360 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5361 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5362 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5363 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5364 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5365 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5366 reuse = MAT_INITIAL_MATRIX; 5367 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5368 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5369 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5370 reuse = MAT_INITIAL_MATRIX; 5371 } else { /* safe to reuse the matrix */ 5372 reuse = MAT_REUSE_MATRIX; 5373 } 5374 } 5375 /* last check */ 5376 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5377 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5378 reuse = MAT_INITIAL_MATRIX; 5379 } 5380 } else { /* first time, so we need to create the matrix */ 5381 reuse = MAT_INITIAL_MATRIX; 5382 } 5383 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 5384 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5385 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5386 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5387 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5388 if (matis->A == pcbddc->local_mat) { 5389 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5390 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5391 } else { 5392 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5393 } 5394 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5395 if (matis->A == pcbddc->local_mat) { 5396 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5397 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5398 } else { 5399 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5400 } 5401 } 5402 /* extract A_RR */ 5403 if (reuse_neumann_solver) { 5404 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5405 5406 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5407 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5408 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5409 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5410 } else { 5411 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5412 } 5413 } else { 5414 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5415 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5416 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5417 } 5418 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5419 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5420 } 5421 if (pcbddc->local_mat->symmetric_set) { 5422 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5423 } 5424 if (!pcbddc->ksp_R) { /* create object if not present */ 5425 void (*f)(void) = 0; 5426 5427 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5428 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5429 /* default */ 5430 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5431 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5432 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5433 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5434 if (issbaij) { 5435 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5436 } else { 5437 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5438 } 5439 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);CHKERRQ(ierr); 5440 /* Allow user's customization */ 5441 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5442 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5443 if (f && pcbddc->mat_graph->cloc) { 5444 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5445 const PetscInt *idxs; 5446 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5447 5448 ierr = ISGetLocalSize(pcbddc->is_R_local,&nl);CHKERRQ(ierr); 5449 ierr = ISGetIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5450 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5451 for (i=0;i<nl;i++) { 5452 for (d=0;d<cdim;d++) { 5453 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5454 } 5455 } 5456 ierr = ISRestoreIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5457 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5458 ierr = PetscFree(scoords);CHKERRQ(ierr); 5459 } 5460 } 5461 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5462 if (!n_R) { 5463 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5464 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5465 } 5466 ierr = MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);CHKERRQ(ierr); 5467 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5468 /* Reuse solver if it is present */ 5469 if (reuse_neumann_solver) { 5470 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5471 5472 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5473 } 5474 } 5475 5476 if (pcbddc->dbg_flag) { 5477 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5478 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5479 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5480 } 5481 5482 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5483 check_corr = PETSC_FALSE; 5484 if (pcbddc->NullSpace_corr[0]) { 5485 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5486 } 5487 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5488 check_corr = PETSC_TRUE; 5489 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5490 } 5491 if (neumann && pcbddc->NullSpace_corr[2]) { 5492 check_corr = PETSC_TRUE; 5493 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5494 } 5495 /* check Dirichlet and Neumann solvers */ 5496 if (pcbddc->dbg_flag) { 5497 if (dirichlet) { /* Dirichlet */ 5498 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5499 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5500 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5501 ierr = KSPCheckSolve(pcbddc->ksp_D,pc,pcis->vec2_D);CHKERRQ(ierr); 5502 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5503 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5504 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5505 if (check_corr) { 5506 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 5507 } 5508 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5509 } 5510 if (neumann) { /* Neumann */ 5511 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5512 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5513 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5514 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec2_R);CHKERRQ(ierr); 5515 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5516 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5517 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5518 if (check_corr) { 5519 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 5520 } 5521 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5522 } 5523 } 5524 /* free Neumann problem's matrix */ 5525 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5526 ierr = PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5527 PetscFunctionReturn(0); 5528 } 5529 5530 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5531 { 5532 PetscErrorCode ierr; 5533 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5534 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5535 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 5536 5537 PetscFunctionBegin; 5538 if (!reuse_solver) { 5539 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5540 } 5541 if (!pcbddc->switch_static) { 5542 if (applytranspose && pcbddc->local_auxmat1) { 5543 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5544 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5545 } 5546 if (!reuse_solver) { 5547 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5548 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5549 } else { 5550 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5551 5552 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5553 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5554 } 5555 } else { 5556 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5557 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5558 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5559 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5560 if (applytranspose && pcbddc->local_auxmat1) { 5561 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5562 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5563 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5564 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5565 } 5566 } 5567 if (!reuse_solver || pcbddc->switch_static) { 5568 if (applytranspose) { 5569 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5570 } else { 5571 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5572 } 5573 ierr = KSPCheckSolve(pcbddc->ksp_R,pc,pcbddc->vec1_R);CHKERRQ(ierr); 5574 } else { 5575 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5576 5577 if (applytranspose) { 5578 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5579 } else { 5580 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5581 } 5582 } 5583 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5584 if (!pcbddc->switch_static) { 5585 if (!reuse_solver) { 5586 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5587 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5588 } else { 5589 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5590 5591 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5592 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5593 } 5594 if (!applytranspose && pcbddc->local_auxmat1) { 5595 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5596 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5597 } 5598 } else { 5599 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5600 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5601 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5602 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5603 if (!applytranspose && pcbddc->local_auxmat1) { 5604 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5605 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5606 } 5607 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5608 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5609 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5610 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5611 } 5612 PetscFunctionReturn(0); 5613 } 5614 5615 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5616 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5617 { 5618 PetscErrorCode ierr; 5619 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5620 PC_IS* pcis = (PC_IS*) (pc->data); 5621 const PetscScalar zero = 0.0; 5622 5623 PetscFunctionBegin; 5624 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5625 if (!pcbddc->benign_apply_coarse_only) { 5626 if (applytranspose) { 5627 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5628 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5629 } else { 5630 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5631 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5632 } 5633 } else { 5634 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5635 } 5636 5637 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5638 if (pcbddc->benign_n) { 5639 PetscScalar *array; 5640 PetscInt j; 5641 5642 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5643 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5644 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5645 } 5646 5647 /* start communications from local primal nodes to rhs of coarse solver */ 5648 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5649 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5650 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5651 5652 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5653 if (pcbddc->coarse_ksp) { 5654 Mat coarse_mat; 5655 Vec rhs,sol; 5656 MatNullSpace nullsp; 5657 PetscBool isbddc = PETSC_FALSE; 5658 5659 if (pcbddc->benign_have_null) { 5660 PC coarse_pc; 5661 5662 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5663 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5664 /* we need to propagate to coarser levels the need for a possible benign correction */ 5665 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5666 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5667 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5668 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5669 } 5670 } 5671 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5672 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5673 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5674 if (applytranspose) { 5675 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5676 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5677 ierr = KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);CHKERRQ(ierr); 5678 ierr = MatGetTransposeNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5679 if (nullsp) { 5680 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5681 } 5682 } else { 5683 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5684 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5685 PC coarse_pc; 5686 5687 if (nullsp) { 5688 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5689 } 5690 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5691 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5692 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5693 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5694 } else { 5695 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5696 ierr = KSPCheckSolve(pcbddc->coarse_ksp,pc,sol);CHKERRQ(ierr); 5697 if (nullsp) { 5698 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5699 } 5700 } 5701 } 5702 /* we don't need the benign correction at coarser levels anymore */ 5703 if (pcbddc->benign_have_null && isbddc) { 5704 PC coarse_pc; 5705 PC_BDDC* coarsepcbddc; 5706 5707 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5708 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5709 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5710 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5711 } 5712 } 5713 5714 /* Local solution on R nodes */ 5715 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5716 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5717 } 5718 /* communications from coarse sol to local primal nodes */ 5719 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5720 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5721 5722 /* Sum contributions from the two levels */ 5723 if (!pcbddc->benign_apply_coarse_only) { 5724 if (applytranspose) { 5725 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5726 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5727 } else { 5728 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5729 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5730 } 5731 /* store p0 */ 5732 if (pcbddc->benign_n) { 5733 PetscScalar *array; 5734 PetscInt j; 5735 5736 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5737 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5738 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5739 } 5740 } else { /* expand the coarse solution */ 5741 if (applytranspose) { 5742 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5743 } else { 5744 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5745 } 5746 } 5747 PetscFunctionReturn(0); 5748 } 5749 5750 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5751 { 5752 PetscErrorCode ierr; 5753 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5754 PetscScalar *array; 5755 Vec from,to; 5756 5757 PetscFunctionBegin; 5758 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5759 from = pcbddc->coarse_vec; 5760 to = pcbddc->vec1_P; 5761 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5762 Vec tvec; 5763 5764 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5765 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5766 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5767 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5768 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5769 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5770 } 5771 } else { /* from local to global -> put data in coarse right hand side */ 5772 from = pcbddc->vec1_P; 5773 to = pcbddc->coarse_vec; 5774 } 5775 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5776 PetscFunctionReturn(0); 5777 } 5778 5779 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5780 { 5781 PetscErrorCode ierr; 5782 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5783 PetscScalar *array; 5784 Vec from,to; 5785 5786 PetscFunctionBegin; 5787 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5788 from = pcbddc->coarse_vec; 5789 to = pcbddc->vec1_P; 5790 } else { /* from local to global -> put data in coarse right hand side */ 5791 from = pcbddc->vec1_P; 5792 to = pcbddc->coarse_vec; 5793 } 5794 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5795 if (smode == SCATTER_FORWARD) { 5796 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5797 Vec tvec; 5798 5799 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5800 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5801 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5802 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5803 } 5804 } else { 5805 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5806 ierr = VecResetArray(from);CHKERRQ(ierr); 5807 } 5808 } 5809 PetscFunctionReturn(0); 5810 } 5811 5812 /* uncomment for testing purposes */ 5813 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5814 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5815 { 5816 PetscErrorCode ierr; 5817 PC_IS* pcis = (PC_IS*)(pc->data); 5818 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5819 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5820 /* one and zero */ 5821 PetscScalar one=1.0,zero=0.0; 5822 /* space to store constraints and their local indices */ 5823 PetscScalar *constraints_data; 5824 PetscInt *constraints_idxs,*constraints_idxs_B; 5825 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5826 PetscInt *constraints_n; 5827 /* iterators */ 5828 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5829 /* BLAS integers */ 5830 PetscBLASInt lwork,lierr; 5831 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5832 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5833 /* reuse */ 5834 PetscInt olocal_primal_size,olocal_primal_size_cc; 5835 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5836 /* change of basis */ 5837 PetscBool qr_needed; 5838 PetscBT change_basis,qr_needed_idx; 5839 /* auxiliary stuff */ 5840 PetscInt *nnz,*is_indices; 5841 PetscInt ncc; 5842 /* some quantities */ 5843 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5844 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5845 PetscReal tol; /* tolerance for retaining eigenmodes */ 5846 5847 PetscFunctionBegin; 5848 tol = PetscSqrtReal(PETSC_SMALL); 5849 /* Destroy Mat objects computed previously */ 5850 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5851 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5852 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5853 /* save info on constraints from previous setup (if any) */ 5854 olocal_primal_size = pcbddc->local_primal_size; 5855 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5856 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5857 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5858 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5859 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5860 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5861 5862 if (!pcbddc->adaptive_selection) { 5863 IS ISForVertices,*ISForFaces,*ISForEdges; 5864 MatNullSpace nearnullsp; 5865 const Vec *nearnullvecs; 5866 Vec *localnearnullsp; 5867 PetscScalar *array; 5868 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5869 PetscBool nnsp_has_cnst; 5870 /* LAPACK working arrays for SVD or POD */ 5871 PetscBool skip_lapack,boolforchange; 5872 PetscScalar *work; 5873 PetscReal *singular_vals; 5874 #if defined(PETSC_USE_COMPLEX) 5875 PetscReal *rwork; 5876 #endif 5877 #if defined(PETSC_MISSING_LAPACK_GESVD) 5878 PetscScalar *temp_basis,*correlation_mat; 5879 #else 5880 PetscBLASInt dummy_int=1; 5881 PetscScalar dummy_scalar=1.; 5882 #endif 5883 5884 /* Get index sets for faces, edges and vertices from graph */ 5885 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5886 /* print some info */ 5887 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5888 PetscInt nv; 5889 5890 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5891 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5892 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5893 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5894 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5895 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5896 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5897 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5898 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5899 } 5900 5901 /* free unneeded index sets */ 5902 if (!pcbddc->use_vertices) { 5903 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5904 } 5905 if (!pcbddc->use_edges) { 5906 for (i=0;i<n_ISForEdges;i++) { 5907 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5908 } 5909 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5910 n_ISForEdges = 0; 5911 } 5912 if (!pcbddc->use_faces) { 5913 for (i=0;i<n_ISForFaces;i++) { 5914 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5915 } 5916 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5917 n_ISForFaces = 0; 5918 } 5919 5920 /* check if near null space is attached to global mat */ 5921 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5922 if (nearnullsp) { 5923 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5924 /* remove any stored info */ 5925 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5926 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5927 /* store information for BDDC solver reuse */ 5928 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5929 pcbddc->onearnullspace = nearnullsp; 5930 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5931 for (i=0;i<nnsp_size;i++) { 5932 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5933 } 5934 } else { /* if near null space is not provided BDDC uses constants by default */ 5935 nnsp_size = 0; 5936 nnsp_has_cnst = PETSC_TRUE; 5937 } 5938 /* get max number of constraints on a single cc */ 5939 max_constraints = nnsp_size; 5940 if (nnsp_has_cnst) max_constraints++; 5941 5942 /* 5943 Evaluate maximum storage size needed by the procedure 5944 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5945 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5946 There can be multiple constraints per connected component 5947 */ 5948 n_vertices = 0; 5949 if (ISForVertices) { 5950 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5951 } 5952 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5953 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5954 5955 total_counts = n_ISForFaces+n_ISForEdges; 5956 total_counts *= max_constraints; 5957 total_counts += n_vertices; 5958 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5959 5960 total_counts = 0; 5961 max_size_of_constraint = 0; 5962 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5963 IS used_is; 5964 if (i<n_ISForEdges) { 5965 used_is = ISForEdges[i]; 5966 } else { 5967 used_is = ISForFaces[i-n_ISForEdges]; 5968 } 5969 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5970 total_counts += j; 5971 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5972 } 5973 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5974 5975 /* get local part of global near null space vectors */ 5976 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5977 for (k=0;k<nnsp_size;k++) { 5978 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5979 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5980 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5981 } 5982 5983 /* whether or not to skip lapack calls */ 5984 skip_lapack = PETSC_TRUE; 5985 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5986 5987 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5988 if (!skip_lapack) { 5989 PetscScalar temp_work; 5990 5991 #if defined(PETSC_MISSING_LAPACK_GESVD) 5992 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5993 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5994 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5995 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5996 #if defined(PETSC_USE_COMPLEX) 5997 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5998 #endif 5999 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6000 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6001 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 6002 lwork = -1; 6003 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6004 #if !defined(PETSC_USE_COMPLEX) 6005 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 6006 #else 6007 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 6008 #endif 6009 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6010 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 6011 #else /* on missing GESVD */ 6012 /* SVD */ 6013 PetscInt max_n,min_n; 6014 max_n = max_size_of_constraint; 6015 min_n = max_constraints; 6016 if (max_size_of_constraint < max_constraints) { 6017 min_n = max_size_of_constraint; 6018 max_n = max_constraints; 6019 } 6020 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 6021 #if defined(PETSC_USE_COMPLEX) 6022 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 6023 #endif 6024 /* now we evaluate the optimal workspace using query with lwork=-1 */ 6025 lwork = -1; 6026 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 6027 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 6028 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 6029 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6030 #if !defined(PETSC_USE_COMPLEX) 6031 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 6032 #else 6033 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 6034 #endif 6035 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6036 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 6037 #endif /* on missing GESVD */ 6038 /* Allocate optimal workspace */ 6039 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 6040 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 6041 } 6042 /* Now we can loop on constraining sets */ 6043 total_counts = 0; 6044 constraints_idxs_ptr[0] = 0; 6045 constraints_data_ptr[0] = 0; 6046 /* vertices */ 6047 if (n_vertices) { 6048 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6049 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 6050 for (i=0;i<n_vertices;i++) { 6051 constraints_n[total_counts] = 1; 6052 constraints_data[total_counts] = 1.0; 6053 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 6054 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 6055 total_counts++; 6056 } 6057 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6058 n_vertices = total_counts; 6059 } 6060 6061 /* edges and faces */ 6062 total_counts_cc = total_counts; 6063 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 6064 IS used_is; 6065 PetscBool idxs_copied = PETSC_FALSE; 6066 6067 if (ncc<n_ISForEdges) { 6068 used_is = ISForEdges[ncc]; 6069 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 6070 } else { 6071 used_is = ISForFaces[ncc-n_ISForEdges]; 6072 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6073 } 6074 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6075 6076 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 6077 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6078 /* change of basis should not be performed on local periodic nodes */ 6079 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6080 if (nnsp_has_cnst) { 6081 PetscScalar quad_value; 6082 6083 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 6084 idxs_copied = PETSC_TRUE; 6085 6086 if (!pcbddc->use_nnsp_true) { 6087 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 6088 } else { 6089 quad_value = 1.0; 6090 } 6091 for (j=0;j<size_of_constraint;j++) { 6092 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 6093 } 6094 temp_constraints++; 6095 total_counts++; 6096 } 6097 for (k=0;k<nnsp_size;k++) { 6098 PetscReal real_value; 6099 PetscScalar *ptr_to_data; 6100 6101 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6102 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6103 for (j=0;j<size_of_constraint;j++) { 6104 ptr_to_data[j] = array[is_indices[j]]; 6105 } 6106 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6107 /* check if array is null on the connected component */ 6108 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6109 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6110 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6111 temp_constraints++; 6112 total_counts++; 6113 if (!idxs_copied) { 6114 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 6115 idxs_copied = PETSC_TRUE; 6116 } 6117 } 6118 } 6119 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6120 valid_constraints = temp_constraints; 6121 if (!pcbddc->use_nnsp_true && temp_constraints) { 6122 if (temp_constraints == 1) { /* just normalize the constraint */ 6123 PetscScalar norm,*ptr_to_data; 6124 6125 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6126 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6127 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6128 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6129 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6130 } else { /* perform SVD */ 6131 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6132 6133 #if defined(PETSC_MISSING_LAPACK_GESVD) 6134 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6135 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6136 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6137 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6138 from that computed using LAPACKgesvd 6139 -> This is due to a different computation of eigenvectors in LAPACKheev 6140 -> The quality of the POD-computed basis will be the same */ 6141 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 6142 /* Store upper triangular part of correlation matrix */ 6143 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6144 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6145 for (j=0;j<temp_constraints;j++) { 6146 for (k=0;k<j+1;k++) { 6147 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6148 } 6149 } 6150 /* compute eigenvalues and eigenvectors of correlation matrix */ 6151 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6152 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 6153 #if !defined(PETSC_USE_COMPLEX) 6154 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6155 #else 6156 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6157 #endif 6158 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6159 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6160 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6161 j = 0; 6162 while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++; 6163 total_counts = total_counts-j; 6164 valid_constraints = temp_constraints-j; 6165 /* scale and copy POD basis into used quadrature memory */ 6166 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6167 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6168 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 6169 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6170 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 6171 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6172 if (j<temp_constraints) { 6173 PetscInt ii; 6174 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6175 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6176 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6177 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6178 for (k=0;k<temp_constraints-j;k++) { 6179 for (ii=0;ii<size_of_constraint;ii++) { 6180 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6181 } 6182 } 6183 } 6184 #else /* on missing GESVD */ 6185 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6186 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6187 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6188 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6189 #if !defined(PETSC_USE_COMPLEX) 6190 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6191 #else 6192 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6193 #endif 6194 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6195 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6196 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6197 k = temp_constraints; 6198 if (k > size_of_constraint) k = size_of_constraint; 6199 j = 0; 6200 while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++; 6201 valid_constraints = k-j; 6202 total_counts = total_counts-temp_constraints+valid_constraints; 6203 #endif /* on missing GESVD */ 6204 } 6205 } 6206 /* update pointers information */ 6207 if (valid_constraints) { 6208 constraints_n[total_counts_cc] = valid_constraints; 6209 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6210 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6211 /* set change_of_basis flag */ 6212 if (boolforchange) { 6213 PetscBTSet(change_basis,total_counts_cc); 6214 } 6215 total_counts_cc++; 6216 } 6217 } 6218 /* free workspace */ 6219 if (!skip_lapack) { 6220 ierr = PetscFree(work);CHKERRQ(ierr); 6221 #if defined(PETSC_USE_COMPLEX) 6222 ierr = PetscFree(rwork);CHKERRQ(ierr); 6223 #endif 6224 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 6225 #if defined(PETSC_MISSING_LAPACK_GESVD) 6226 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 6227 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 6228 #endif 6229 } 6230 for (k=0;k<nnsp_size;k++) { 6231 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 6232 } 6233 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 6234 /* free index sets of faces, edges and vertices */ 6235 for (i=0;i<n_ISForFaces;i++) { 6236 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6237 } 6238 if (n_ISForFaces) { 6239 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6240 } 6241 for (i=0;i<n_ISForEdges;i++) { 6242 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6243 } 6244 if (n_ISForEdges) { 6245 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6246 } 6247 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6248 } else { 6249 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6250 6251 total_counts = 0; 6252 n_vertices = 0; 6253 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6254 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6255 } 6256 max_constraints = 0; 6257 total_counts_cc = 0; 6258 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6259 total_counts += pcbddc->adaptive_constraints_n[i]; 6260 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6261 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6262 } 6263 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6264 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6265 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6266 constraints_data = pcbddc->adaptive_constraints_data; 6267 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6268 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 6269 total_counts_cc = 0; 6270 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6271 if (pcbddc->adaptive_constraints_n[i]) { 6272 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6273 } 6274 } 6275 6276 max_size_of_constraint = 0; 6277 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6278 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 6279 /* Change of basis */ 6280 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 6281 if (pcbddc->use_change_of_basis) { 6282 for (i=0;i<sub_schurs->n_subs;i++) { 6283 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6284 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 6285 } 6286 } 6287 } 6288 } 6289 pcbddc->local_primal_size = total_counts; 6290 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6291 6292 /* map constraints_idxs in boundary numbering */ 6293 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 6294 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i); 6295 6296 /* Create constraint matrix */ 6297 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6298 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 6299 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 6300 6301 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6302 /* determine if a QR strategy is needed for change of basis */ 6303 qr_needed = pcbddc->use_qr_single; 6304 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6305 total_primal_vertices=0; 6306 pcbddc->local_primal_size_cc = 0; 6307 for (i=0;i<total_counts_cc;i++) { 6308 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6309 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6310 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6311 pcbddc->local_primal_size_cc += 1; 6312 } else if (PetscBTLookup(change_basis,i)) { 6313 for (k=0;k<constraints_n[i];k++) { 6314 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6315 } 6316 pcbddc->local_primal_size_cc += constraints_n[i]; 6317 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6318 PetscBTSet(qr_needed_idx,i); 6319 qr_needed = PETSC_TRUE; 6320 } 6321 } else { 6322 pcbddc->local_primal_size_cc += 1; 6323 } 6324 } 6325 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6326 pcbddc->n_vertices = total_primal_vertices; 6327 /* permute indices in order to have a sorted set of vertices */ 6328 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6329 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6330 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 6331 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6332 6333 /* nonzero structure of constraint matrix */ 6334 /* and get reference dof for local constraints */ 6335 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6336 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6337 6338 j = total_primal_vertices; 6339 total_counts = total_primal_vertices; 6340 cum = total_primal_vertices; 6341 for (i=n_vertices;i<total_counts_cc;i++) { 6342 if (!PetscBTLookup(change_basis,i)) { 6343 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6344 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6345 cum++; 6346 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6347 for (k=0;k<constraints_n[i];k++) { 6348 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6349 nnz[j+k] = size_of_constraint; 6350 } 6351 j += constraints_n[i]; 6352 } 6353 } 6354 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6355 ierr = MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6356 ierr = PetscFree(nnz);CHKERRQ(ierr); 6357 6358 /* set values in constraint matrix */ 6359 for (i=0;i<total_primal_vertices;i++) { 6360 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6361 } 6362 total_counts = total_primal_vertices; 6363 for (i=n_vertices;i<total_counts_cc;i++) { 6364 if (!PetscBTLookup(change_basis,i)) { 6365 PetscInt *cols; 6366 6367 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6368 cols = constraints_idxs+constraints_idxs_ptr[i]; 6369 for (k=0;k<constraints_n[i];k++) { 6370 PetscInt row = total_counts+k; 6371 PetscScalar *vals; 6372 6373 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6374 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6375 } 6376 total_counts += constraints_n[i]; 6377 } 6378 } 6379 /* assembling */ 6380 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6381 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6382 ierr = MatViewFromOptions(pcbddc->ConstraintMatrix,NULL,"-pc_bddc_constraint_mat_view");CHKERRQ(ierr); 6383 6384 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6385 if (pcbddc->use_change_of_basis) { 6386 /* dual and primal dofs on a single cc */ 6387 PetscInt dual_dofs,primal_dofs; 6388 /* working stuff for GEQRF */ 6389 PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t; 6390 PetscBLASInt lqr_work; 6391 /* working stuff for UNGQR */ 6392 PetscScalar *gqr_work = NULL,lgqr_work_t; 6393 PetscBLASInt lgqr_work; 6394 /* working stuff for TRTRS */ 6395 PetscScalar *trs_rhs = NULL; 6396 PetscBLASInt Blas_NRHS; 6397 /* pointers for values insertion into change of basis matrix */ 6398 PetscInt *start_rows,*start_cols; 6399 PetscScalar *start_vals; 6400 /* working stuff for values insertion */ 6401 PetscBT is_primal; 6402 PetscInt *aux_primal_numbering_B; 6403 /* matrix sizes */ 6404 PetscInt global_size,local_size; 6405 /* temporary change of basis */ 6406 Mat localChangeOfBasisMatrix; 6407 /* extra space for debugging */ 6408 PetscScalar *dbg_work = NULL; 6409 6410 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6411 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6412 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6413 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6414 /* nonzeros for local mat */ 6415 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6416 if (!pcbddc->benign_change || pcbddc->fake_change) { 6417 for (i=0;i<pcis->n;i++) nnz[i]=1; 6418 } else { 6419 const PetscInt *ii; 6420 PetscInt n; 6421 PetscBool flg_row; 6422 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6423 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6424 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6425 } 6426 for (i=n_vertices;i<total_counts_cc;i++) { 6427 if (PetscBTLookup(change_basis,i)) { 6428 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6429 if (PetscBTLookup(qr_needed_idx,i)) { 6430 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6431 } else { 6432 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6433 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6434 } 6435 } 6436 } 6437 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6438 ierr = MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6439 ierr = PetscFree(nnz);CHKERRQ(ierr); 6440 /* Set interior change in the matrix */ 6441 if (!pcbddc->benign_change || pcbddc->fake_change) { 6442 for (i=0;i<pcis->n;i++) { 6443 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6444 } 6445 } else { 6446 const PetscInt *ii,*jj; 6447 PetscScalar *aa; 6448 PetscInt n; 6449 PetscBool flg_row; 6450 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6451 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6452 for (i=0;i<n;i++) { 6453 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6454 } 6455 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6456 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6457 } 6458 6459 if (pcbddc->dbg_flag) { 6460 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6461 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6462 } 6463 6464 6465 /* Now we loop on the constraints which need a change of basis */ 6466 /* 6467 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6468 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6469 6470 Basic blocks of change of basis matrix T computed by 6471 6472 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6473 6474 | 1 0 ... 0 s_1/S | 6475 | 0 1 ... 0 s_2/S | 6476 | ... | 6477 | 0 ... 1 s_{n-1}/S | 6478 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6479 6480 with S = \sum_{i=1}^n s_i^2 6481 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6482 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6483 6484 - QR decomposition of constraints otherwise 6485 */ 6486 if (qr_needed && max_size_of_constraint) { 6487 /* space to store Q */ 6488 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6489 /* array to store scaling factors for reflectors */ 6490 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6491 /* first we issue queries for optimal work */ 6492 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6493 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6494 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6495 lqr_work = -1; 6496 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6497 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6498 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6499 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6500 lgqr_work = -1; 6501 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6502 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6503 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6504 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6505 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6506 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6507 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6508 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6509 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6510 /* array to store rhs and solution of triangular solver */ 6511 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6512 /* allocating workspace for check */ 6513 if (pcbddc->dbg_flag) { 6514 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6515 } 6516 } 6517 /* array to store whether a node is primal or not */ 6518 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6519 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6520 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6521 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i); 6522 for (i=0;i<total_primal_vertices;i++) { 6523 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6524 } 6525 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6526 6527 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6528 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6529 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6530 if (PetscBTLookup(change_basis,total_counts)) { 6531 /* get constraint info */ 6532 primal_dofs = constraints_n[total_counts]; 6533 dual_dofs = size_of_constraint-primal_dofs; 6534 6535 if (pcbddc->dbg_flag) { 6536 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6537 } 6538 6539 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6540 6541 /* copy quadrature constraints for change of basis check */ 6542 if (pcbddc->dbg_flag) { 6543 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6544 } 6545 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6546 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6547 6548 /* compute QR decomposition of constraints */ 6549 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6550 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6551 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6552 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6553 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6554 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6555 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6556 6557 /* explictly compute R^-T */ 6558 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 6559 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6560 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6561 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6562 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6563 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6564 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6565 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6566 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6567 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6568 6569 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6570 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6571 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6572 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6573 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6574 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6575 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6576 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6577 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6578 6579 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6580 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6581 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6582 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6583 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6584 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6585 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6586 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6587 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6588 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6589 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6590 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6591 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6592 6593 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6594 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6595 /* insert cols for primal dofs */ 6596 for (j=0;j<primal_dofs;j++) { 6597 start_vals = &qr_basis[j*size_of_constraint]; 6598 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6599 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6600 } 6601 /* insert cols for dual dofs */ 6602 for (j=0,k=0;j<dual_dofs;k++) { 6603 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6604 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6605 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6606 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6607 j++; 6608 } 6609 } 6610 6611 /* check change of basis */ 6612 if (pcbddc->dbg_flag) { 6613 PetscInt ii,jj; 6614 PetscBool valid_qr=PETSC_TRUE; 6615 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6616 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6617 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6618 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6619 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6620 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6621 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6622 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6623 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6624 for (jj=0;jj<size_of_constraint;jj++) { 6625 for (ii=0;ii<primal_dofs;ii++) { 6626 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6627 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6628 } 6629 } 6630 if (!valid_qr) { 6631 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6632 for (jj=0;jj<size_of_constraint;jj++) { 6633 for (ii=0;ii<primal_dofs;ii++) { 6634 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6635 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6636 } 6637 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6638 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6639 } 6640 } 6641 } 6642 } else { 6643 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6644 } 6645 } 6646 } else { /* simple transformation block */ 6647 PetscInt row,col; 6648 PetscScalar val,norm; 6649 6650 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6651 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6652 for (j=0;j<size_of_constraint;j++) { 6653 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6654 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6655 if (!PetscBTLookup(is_primal,row_B)) { 6656 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6657 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6658 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6659 } else { 6660 for (k=0;k<size_of_constraint;k++) { 6661 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6662 if (row != col) { 6663 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6664 } else { 6665 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6666 } 6667 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6668 } 6669 } 6670 } 6671 if (pcbddc->dbg_flag) { 6672 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6673 } 6674 } 6675 } else { 6676 if (pcbddc->dbg_flag) { 6677 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6678 } 6679 } 6680 } 6681 6682 /* free workspace */ 6683 if (qr_needed) { 6684 if (pcbddc->dbg_flag) { 6685 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6686 } 6687 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6688 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6689 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6690 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6691 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6692 } 6693 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6694 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6695 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6696 6697 /* assembling of global change of variable */ 6698 if (!pcbddc->fake_change) { 6699 Mat tmat; 6700 PetscInt bs; 6701 6702 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6703 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6704 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6705 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6706 ierr = MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6707 ierr = MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6708 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6709 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6710 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6711 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6712 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6713 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6714 ierr = MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6715 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6716 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6717 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6718 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6719 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6720 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6721 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6722 6723 /* check */ 6724 if (pcbddc->dbg_flag) { 6725 PetscReal error; 6726 Vec x,x_change; 6727 6728 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6729 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6730 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6731 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6732 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6733 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6734 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6735 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6736 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6737 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6738 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6739 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6740 if (error > PETSC_SMALL) { 6741 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 6742 } 6743 ierr = VecDestroy(&x);CHKERRQ(ierr); 6744 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6745 } 6746 /* adapt sub_schurs computed (if any) */ 6747 if (pcbddc->use_deluxe_scaling) { 6748 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6749 6750 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6751 if (sub_schurs && sub_schurs->S_Ej_all) { 6752 Mat S_new,tmat; 6753 IS is_all_N,is_V_Sall = NULL; 6754 6755 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6756 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6757 if (pcbddc->deluxe_zerorows) { 6758 ISLocalToGlobalMapping NtoSall; 6759 IS is_V; 6760 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6761 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6762 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6763 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6764 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6765 } 6766 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6767 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6768 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6769 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6770 if (pcbddc->deluxe_zerorows) { 6771 const PetscScalar *array; 6772 const PetscInt *idxs_V,*idxs_all; 6773 PetscInt i,n_V; 6774 6775 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6776 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6777 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6778 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6779 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6780 for (i=0;i<n_V;i++) { 6781 PetscScalar val; 6782 PetscInt idx; 6783 6784 idx = idxs_V[i]; 6785 val = array[idxs_all[idxs_V[i]]]; 6786 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6787 } 6788 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6789 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6790 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6791 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6792 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6793 } 6794 sub_schurs->S_Ej_all = S_new; 6795 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6796 if (sub_schurs->sum_S_Ej_all) { 6797 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6798 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6799 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6800 if (pcbddc->deluxe_zerorows) { 6801 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6802 } 6803 sub_schurs->sum_S_Ej_all = S_new; 6804 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6805 } 6806 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6807 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6808 } 6809 /* destroy any change of basis context in sub_schurs */ 6810 if (sub_schurs && sub_schurs->change) { 6811 PetscInt i; 6812 6813 for (i=0;i<sub_schurs->n_subs;i++) { 6814 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6815 } 6816 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6817 } 6818 } 6819 if (pcbddc->switch_static) { /* need to save the local change */ 6820 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6821 } else { 6822 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6823 } 6824 /* determine if any process has changed the pressures locally */ 6825 pcbddc->change_interior = pcbddc->benign_have_null; 6826 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6827 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6828 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6829 pcbddc->use_qr_single = qr_needed; 6830 } 6831 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6832 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6833 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6834 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6835 } else { 6836 Mat benign_global = NULL; 6837 if (pcbddc->benign_have_null) { 6838 Mat M; 6839 6840 pcbddc->change_interior = PETSC_TRUE; 6841 ierr = VecCopy(matis->counter,pcis->vec1_N);CHKERRQ(ierr); 6842 ierr = VecReciprocal(pcis->vec1_N);CHKERRQ(ierr); 6843 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);CHKERRQ(ierr); 6844 if (pcbddc->benign_change) { 6845 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6846 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6847 } else { 6848 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);CHKERRQ(ierr); 6849 ierr = MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);CHKERRQ(ierr); 6850 } 6851 ierr = MatISSetLocalMat(benign_global,M);CHKERRQ(ierr); 6852 ierr = MatDestroy(&M);CHKERRQ(ierr); 6853 ierr = MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6854 ierr = MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6855 } 6856 if (pcbddc->user_ChangeOfBasisMatrix) { 6857 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6858 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6859 } else if (pcbddc->benign_have_null) { 6860 pcbddc->ChangeOfBasisMatrix = benign_global; 6861 } 6862 } 6863 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6864 IS is_global; 6865 const PetscInt *gidxs; 6866 6867 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6868 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6869 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6870 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6871 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6872 } 6873 } 6874 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6875 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6876 } 6877 6878 if (!pcbddc->fake_change) { 6879 /* add pressure dofs to set of primal nodes for numbering purposes */ 6880 for (i=0;i<pcbddc->benign_n;i++) { 6881 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6882 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6883 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6884 pcbddc->local_primal_size_cc++; 6885 pcbddc->local_primal_size++; 6886 } 6887 6888 /* check if a new primal space has been introduced (also take into account benign trick) */ 6889 pcbddc->new_primal_space_local = PETSC_TRUE; 6890 if (olocal_primal_size == pcbddc->local_primal_size) { 6891 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6892 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6893 if (!pcbddc->new_primal_space_local) { 6894 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6895 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6896 } 6897 } 6898 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6899 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6900 } 6901 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6902 6903 /* flush dbg viewer */ 6904 if (pcbddc->dbg_flag) { 6905 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6906 } 6907 6908 /* free workspace */ 6909 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6910 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6911 if (!pcbddc->adaptive_selection) { 6912 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6913 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6914 } else { 6915 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6916 pcbddc->adaptive_constraints_idxs_ptr, 6917 pcbddc->adaptive_constraints_data_ptr, 6918 pcbddc->adaptive_constraints_idxs, 6919 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6920 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6921 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6922 } 6923 PetscFunctionReturn(0); 6924 } 6925 /* #undef PETSC_MISSING_LAPACK_GESVD */ 6926 6927 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6928 { 6929 ISLocalToGlobalMapping map; 6930 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6931 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6932 PetscInt i,N; 6933 PetscBool rcsr = PETSC_FALSE; 6934 PetscErrorCode ierr; 6935 6936 PetscFunctionBegin; 6937 if (pcbddc->recompute_topography) { 6938 pcbddc->graphanalyzed = PETSC_FALSE; 6939 /* Reset previously computed graph */ 6940 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6941 /* Init local Graph struct */ 6942 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6943 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6944 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6945 6946 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6947 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6948 } 6949 /* Check validity of the csr graph passed in by the user */ 6950 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6951 6952 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6953 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6954 PetscInt *xadj,*adjncy; 6955 PetscInt nvtxs; 6956 PetscBool flg_row=PETSC_FALSE; 6957 6958 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6959 if (flg_row) { 6960 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6961 pcbddc->computed_rowadj = PETSC_TRUE; 6962 } 6963 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6964 rcsr = PETSC_TRUE; 6965 } 6966 if (pcbddc->dbg_flag) { 6967 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6968 } 6969 6970 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 6971 PetscReal *lcoords; 6972 PetscInt n; 6973 MPI_Datatype dimrealtype; 6974 6975 if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 6976 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 6977 ierr = PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);CHKERRQ(ierr); 6978 ierr = MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);CHKERRQ(ierr); 6979 ierr = MPI_Type_commit(&dimrealtype);CHKERRQ(ierr); 6980 ierr = PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 6981 ierr = PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 6982 ierr = MPI_Type_free(&dimrealtype);CHKERRQ(ierr); 6983 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 6984 6985 pcbddc->mat_graph->coords = lcoords; 6986 pcbddc->mat_graph->cloc = PETSC_TRUE; 6987 pcbddc->mat_graph->cnloc = n; 6988 } 6989 if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 6990 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected); 6991 6992 /* Setup of Graph */ 6993 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6994 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6995 6996 /* attach info on disconnected subdomains if present */ 6997 if (pcbddc->n_local_subs) { 6998 PetscInt *local_subs; 6999 7000 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 7001 for (i=0;i<pcbddc->n_local_subs;i++) { 7002 const PetscInt *idxs; 7003 PetscInt nl,j; 7004 7005 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 7006 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 7007 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 7008 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 7009 } 7010 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 7011 pcbddc->mat_graph->local_subs = local_subs; 7012 } 7013 } 7014 7015 if (!pcbddc->graphanalyzed) { 7016 /* Graph's connected components analysis */ 7017 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 7018 pcbddc->graphanalyzed = PETSC_TRUE; 7019 } 7020 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 7021 PetscFunctionReturn(0); 7022 } 7023 7024 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 7025 { 7026 PetscInt i,j; 7027 PetscScalar *alphas; 7028 PetscErrorCode ierr; 7029 7030 PetscFunctionBegin; 7031 if (!n) PetscFunctionReturn(0); 7032 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 7033 ierr = VecNormalize(vecs[0],NULL);CHKERRQ(ierr); 7034 for (i=1;i<n;i++) { 7035 ierr = VecMDot(vecs[i],i,vecs,alphas);CHKERRQ(ierr); 7036 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 7037 ierr = VecMAXPY(vecs[i],i,alphas,vecs);CHKERRQ(ierr); 7038 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 7039 } 7040 ierr = PetscFree(alphas);CHKERRQ(ierr); 7041 PetscFunctionReturn(0); 7042 } 7043 7044 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 7045 { 7046 Mat A; 7047 PetscInt n_neighs,*neighs,*n_shared,**shared; 7048 PetscMPIInt size,rank,color; 7049 PetscInt *xadj,*adjncy; 7050 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 7051 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 7052 PetscInt void_procs,*procs_candidates = NULL; 7053 PetscInt xadj_count,*count; 7054 PetscBool ismatis,use_vwgt=PETSC_FALSE; 7055 PetscSubcomm psubcomm; 7056 MPI_Comm subcomm; 7057 PetscErrorCode ierr; 7058 7059 PetscFunctionBegin; 7060 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7061 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7062 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7063 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 7064 PetscValidLogicalCollectiveInt(mat,redprocs,3); 7065 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains); 7066 7067 if (have_void) *have_void = PETSC_FALSE; 7068 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 7069 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 7070 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 7071 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 7072 im_active = !!n; 7073 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7074 void_procs = size - active_procs; 7075 /* get ranks of of non-active processes in mat communicator */ 7076 if (void_procs) { 7077 PetscInt ncand; 7078 7079 if (have_void) *have_void = PETSC_TRUE; 7080 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 7081 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7082 for (i=0,ncand=0;i<size;i++) { 7083 if (!procs_candidates[i]) { 7084 procs_candidates[ncand++] = i; 7085 } 7086 } 7087 /* force n_subdomains to be not greater that the number of non-active processes */ 7088 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7089 } 7090 7091 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7092 number of subdomains requested 1 -> send to master or first candidate in voids */ 7093 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 7094 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7095 PetscInt issize,isidx,dest; 7096 if (*n_subdomains == 1) dest = 0; 7097 else dest = rank; 7098 if (im_active) { 7099 issize = 1; 7100 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7101 isidx = procs_candidates[dest]; 7102 } else { 7103 isidx = dest; 7104 } 7105 } else { 7106 issize = 0; 7107 isidx = -1; 7108 } 7109 if (*n_subdomains != 1) *n_subdomains = active_procs; 7110 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 7111 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7112 PetscFunctionReturn(0); 7113 } 7114 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 7115 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 7116 threshold = PetscMax(threshold,2); 7117 7118 /* Get info on mapping */ 7119 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7120 7121 /* build local CSR graph of subdomains' connectivity */ 7122 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 7123 xadj[0] = 0; 7124 xadj[1] = PetscMax(n_neighs-1,0); 7125 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 7126 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 7127 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 7128 for (i=1;i<n_neighs;i++) 7129 for (j=0;j<n_shared[i];j++) 7130 count[shared[i][j]] += 1; 7131 7132 xadj_count = 0; 7133 for (i=1;i<n_neighs;i++) { 7134 for (j=0;j<n_shared[i];j++) { 7135 if (count[shared[i][j]] < threshold) { 7136 adjncy[xadj_count] = neighs[i]; 7137 adjncy_wgt[xadj_count] = n_shared[i]; 7138 xadj_count++; 7139 break; 7140 } 7141 } 7142 } 7143 xadj[1] = xadj_count; 7144 ierr = PetscFree(count);CHKERRQ(ierr); 7145 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7146 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7147 7148 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 7149 7150 /* Restrict work on active processes only */ 7151 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 7152 if (void_procs) { 7153 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 7154 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 7155 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 7156 subcomm = PetscSubcommChild(psubcomm); 7157 } else { 7158 psubcomm = NULL; 7159 subcomm = PetscObjectComm((PetscObject)mat); 7160 } 7161 7162 v_wgt = NULL; 7163 if (!color) { 7164 ierr = PetscFree(xadj);CHKERRQ(ierr); 7165 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7166 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7167 } else { 7168 Mat subdomain_adj; 7169 IS new_ranks,new_ranks_contig; 7170 MatPartitioning partitioner; 7171 PetscInt rstart=0,rend=0; 7172 PetscInt *is_indices,*oldranks; 7173 PetscMPIInt size; 7174 PetscBool aggregate; 7175 7176 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 7177 if (void_procs) { 7178 PetscInt prank = rank; 7179 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 7180 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 7181 for (i=0;i<xadj[1];i++) { 7182 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 7183 } 7184 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7185 } else { 7186 oldranks = NULL; 7187 } 7188 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7189 if (aggregate) { /* TODO: all this part could be made more efficient */ 7190 PetscInt lrows,row,ncols,*cols; 7191 PetscMPIInt nrank; 7192 PetscScalar *vals; 7193 7194 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 7195 lrows = 0; 7196 if (nrank<redprocs) { 7197 lrows = size/redprocs; 7198 if (nrank<size%redprocs) lrows++; 7199 } 7200 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 7201 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 7202 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7203 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7204 row = nrank; 7205 ncols = xadj[1]-xadj[0]; 7206 cols = adjncy; 7207 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 7208 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7209 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 7210 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7211 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7212 ierr = PetscFree(xadj);CHKERRQ(ierr); 7213 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7214 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7215 ierr = PetscFree(vals);CHKERRQ(ierr); 7216 if (use_vwgt) { 7217 Vec v; 7218 const PetscScalar *array; 7219 PetscInt nl; 7220 7221 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 7222 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 7223 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 7224 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 7225 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 7226 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 7227 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 7228 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7229 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 7230 ierr = VecDestroy(&v);CHKERRQ(ierr); 7231 } 7232 } else { 7233 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 7234 if (use_vwgt) { 7235 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 7236 v_wgt[0] = n; 7237 } 7238 } 7239 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 7240 7241 /* Partition */ 7242 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 7243 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 7244 if (v_wgt) { 7245 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 7246 } 7247 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7248 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 7249 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 7250 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 7251 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 7252 7253 /* renumber new_ranks to avoid "holes" in new set of processors */ 7254 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 7255 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 7256 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7257 if (!aggregate) { 7258 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7259 #if defined(PETSC_USE_DEBUG) 7260 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7261 #endif 7262 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7263 } else if (oldranks) { 7264 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7265 } else { 7266 ranks_send_to_idx[0] = is_indices[0]; 7267 } 7268 } else { 7269 PetscInt idx = 0; 7270 PetscMPIInt tag; 7271 MPI_Request *reqs; 7272 7273 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 7274 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 7275 for (i=rstart;i<rend;i++) { 7276 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 7277 } 7278 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 7279 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7280 ierr = PetscFree(reqs);CHKERRQ(ierr); 7281 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7282 #if defined(PETSC_USE_DEBUG) 7283 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7284 #endif 7285 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7286 } else if (oldranks) { 7287 ranks_send_to_idx[0] = oldranks[idx]; 7288 } else { 7289 ranks_send_to_idx[0] = idx; 7290 } 7291 } 7292 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7293 /* clean up */ 7294 ierr = PetscFree(oldranks);CHKERRQ(ierr); 7295 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 7296 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 7297 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 7298 } 7299 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 7300 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7301 7302 /* assemble parallel IS for sends */ 7303 i = 1; 7304 if (!color) i=0; 7305 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 7306 PetscFunctionReturn(0); 7307 } 7308 7309 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7310 7311 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7312 { 7313 Mat local_mat; 7314 IS is_sends_internal; 7315 PetscInt rows,cols,new_local_rows; 7316 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7317 PetscBool ismatis,isdense,newisdense,destroy_mat; 7318 ISLocalToGlobalMapping l2gmap; 7319 PetscInt* l2gmap_indices; 7320 const PetscInt* is_indices; 7321 MatType new_local_type; 7322 /* buffers */ 7323 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7324 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7325 PetscInt *recv_buffer_idxs_local; 7326 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 7327 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7328 /* MPI */ 7329 MPI_Comm comm,comm_n; 7330 PetscSubcomm subcomm; 7331 PetscMPIInt n_sends,n_recvs,size; 7332 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7333 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7334 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7335 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7336 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7337 PetscErrorCode ierr; 7338 7339 PetscFunctionBegin; 7340 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7341 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7342 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7343 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7344 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7345 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7346 PetscValidLogicalCollectiveBool(mat,reuse,6); 7347 PetscValidLogicalCollectiveInt(mat,nis,8); 7348 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7349 if (nvecs) { 7350 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7351 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7352 } 7353 /* further checks */ 7354 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7355 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7356 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7357 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7358 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7359 if (reuse && *mat_n) { 7360 PetscInt mrows,mcols,mnrows,mncols; 7361 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7362 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7363 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7364 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7365 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7366 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7367 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7368 } 7369 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7370 PetscValidLogicalCollectiveInt(mat,bs,0); 7371 7372 /* prepare IS for sending if not provided */ 7373 if (!is_sends) { 7374 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7375 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7376 } else { 7377 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7378 is_sends_internal = is_sends; 7379 } 7380 7381 /* get comm */ 7382 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7383 7384 /* compute number of sends */ 7385 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7386 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7387 7388 /* compute number of receives */ 7389 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 7390 ierr = PetscMalloc1(size,&iflags);CHKERRQ(ierr); 7391 ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr); 7392 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7393 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7394 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7395 ierr = PetscFree(iflags);CHKERRQ(ierr); 7396 7397 /* restrict comm if requested */ 7398 subcomm = 0; 7399 destroy_mat = PETSC_FALSE; 7400 if (restrict_comm) { 7401 PetscMPIInt color,subcommsize; 7402 7403 color = 0; 7404 if (restrict_full) { 7405 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7406 } else { 7407 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7408 } 7409 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7410 subcommsize = size - subcommsize; 7411 /* check if reuse has been requested */ 7412 if (reuse) { 7413 if (*mat_n) { 7414 PetscMPIInt subcommsize2; 7415 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 7416 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7417 comm_n = PetscObjectComm((PetscObject)*mat_n); 7418 } else { 7419 comm_n = PETSC_COMM_SELF; 7420 } 7421 } else { /* MAT_INITIAL_MATRIX */ 7422 PetscMPIInt rank; 7423 7424 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 7425 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7426 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7427 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7428 comm_n = PetscSubcommChild(subcomm); 7429 } 7430 /* flag to destroy *mat_n if not significative */ 7431 if (color) destroy_mat = PETSC_TRUE; 7432 } else { 7433 comm_n = comm; 7434 } 7435 7436 /* prepare send/receive buffers */ 7437 ierr = PetscMalloc1(size,&ilengths_idxs);CHKERRQ(ierr); 7438 ierr = PetscMemzero(ilengths_idxs,size*sizeof(*ilengths_idxs));CHKERRQ(ierr); 7439 ierr = PetscMalloc1(size,&ilengths_vals);CHKERRQ(ierr); 7440 ierr = PetscMemzero(ilengths_vals,size*sizeof(*ilengths_vals));CHKERRQ(ierr); 7441 if (nis) { 7442 ierr = PetscCalloc1(size,&ilengths_idxs_is);CHKERRQ(ierr); 7443 } 7444 7445 /* Get data from local matrices */ 7446 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7447 /* TODO: See below some guidelines on how to prepare the local buffers */ 7448 /* 7449 send_buffer_vals should contain the raw values of the local matrix 7450 send_buffer_idxs should contain: 7451 - MatType_PRIVATE type 7452 - PetscInt size_of_l2gmap 7453 - PetscInt global_row_indices[size_of_l2gmap] 7454 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7455 */ 7456 else { 7457 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7458 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7459 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7460 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7461 send_buffer_idxs[1] = i; 7462 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7463 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 7464 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7465 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7466 for (i=0;i<n_sends;i++) { 7467 ilengths_vals[is_indices[i]] = len*len; 7468 ilengths_idxs[is_indices[i]] = len+2; 7469 } 7470 } 7471 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7472 /* additional is (if any) */ 7473 if (nis) { 7474 PetscMPIInt psum; 7475 PetscInt j; 7476 for (j=0,psum=0;j<nis;j++) { 7477 PetscInt plen; 7478 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7479 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7480 psum += len+1; /* indices + lenght */ 7481 } 7482 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7483 for (j=0,psum=0;j<nis;j++) { 7484 PetscInt plen; 7485 const PetscInt *is_array_idxs; 7486 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7487 send_buffer_idxs_is[psum] = plen; 7488 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7489 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 7490 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7491 psum += plen+1; /* indices + lenght */ 7492 } 7493 for (i=0;i<n_sends;i++) { 7494 ilengths_idxs_is[is_indices[i]] = psum; 7495 } 7496 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7497 } 7498 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7499 7500 buf_size_idxs = 0; 7501 buf_size_vals = 0; 7502 buf_size_idxs_is = 0; 7503 buf_size_vecs = 0; 7504 for (i=0;i<n_recvs;i++) { 7505 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7506 buf_size_vals += (PetscInt)olengths_vals[i]; 7507 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7508 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7509 } 7510 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7511 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7512 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7513 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7514 7515 /* get new tags for clean communications */ 7516 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7517 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7518 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7519 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7520 7521 /* allocate for requests */ 7522 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7523 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7524 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7525 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7526 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7527 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7528 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7529 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7530 7531 /* communications */ 7532 ptr_idxs = recv_buffer_idxs; 7533 ptr_vals = recv_buffer_vals; 7534 ptr_idxs_is = recv_buffer_idxs_is; 7535 ptr_vecs = recv_buffer_vecs; 7536 for (i=0;i<n_recvs;i++) { 7537 source_dest = onodes[i]; 7538 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7539 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7540 ptr_idxs += olengths_idxs[i]; 7541 ptr_vals += olengths_vals[i]; 7542 if (nis) { 7543 source_dest = onodes_is[i]; 7544 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7545 ptr_idxs_is += olengths_idxs_is[i]; 7546 } 7547 if (nvecs) { 7548 source_dest = onodes[i]; 7549 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7550 ptr_vecs += olengths_idxs[i]-2; 7551 } 7552 } 7553 for (i=0;i<n_sends;i++) { 7554 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7555 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7556 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7557 if (nis) { 7558 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7559 } 7560 if (nvecs) { 7561 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7562 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7563 } 7564 } 7565 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7566 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7567 7568 /* assemble new l2g map */ 7569 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7570 ptr_idxs = recv_buffer_idxs; 7571 new_local_rows = 0; 7572 for (i=0;i<n_recvs;i++) { 7573 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7574 ptr_idxs += olengths_idxs[i]; 7575 } 7576 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7577 ptr_idxs = recv_buffer_idxs; 7578 new_local_rows = 0; 7579 for (i=0;i<n_recvs;i++) { 7580 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 7581 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7582 ptr_idxs += olengths_idxs[i]; 7583 } 7584 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7585 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7586 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7587 7588 /* infer new local matrix type from received local matrices type */ 7589 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7590 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7591 if (n_recvs) { 7592 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7593 ptr_idxs = recv_buffer_idxs; 7594 for (i=0;i<n_recvs;i++) { 7595 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7596 new_local_type_private = MATAIJ_PRIVATE; 7597 break; 7598 } 7599 ptr_idxs += olengths_idxs[i]; 7600 } 7601 switch (new_local_type_private) { 7602 case MATDENSE_PRIVATE: 7603 new_local_type = MATSEQAIJ; 7604 bs = 1; 7605 break; 7606 case MATAIJ_PRIVATE: 7607 new_local_type = MATSEQAIJ; 7608 bs = 1; 7609 break; 7610 case MATBAIJ_PRIVATE: 7611 new_local_type = MATSEQBAIJ; 7612 break; 7613 case MATSBAIJ_PRIVATE: 7614 new_local_type = MATSEQSBAIJ; 7615 break; 7616 default: 7617 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7618 break; 7619 } 7620 } else { /* by default, new_local_type is seqaij */ 7621 new_local_type = MATSEQAIJ; 7622 bs = 1; 7623 } 7624 7625 /* create MATIS object if needed */ 7626 if (!reuse) { 7627 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7628 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7629 } else { 7630 /* it also destroys the local matrices */ 7631 if (*mat_n) { 7632 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7633 } else { /* this is a fake object */ 7634 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7635 } 7636 } 7637 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7638 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7639 7640 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7641 7642 /* Global to local map of received indices */ 7643 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7644 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7645 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7646 7647 /* restore attributes -> type of incoming data and its size */ 7648 buf_size_idxs = 0; 7649 for (i=0;i<n_recvs;i++) { 7650 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7651 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7652 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7653 } 7654 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7655 7656 /* set preallocation */ 7657 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7658 if (!newisdense) { 7659 PetscInt *new_local_nnz=0; 7660 7661 ptr_idxs = recv_buffer_idxs_local; 7662 if (n_recvs) { 7663 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7664 } 7665 for (i=0;i<n_recvs;i++) { 7666 PetscInt j; 7667 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7668 for (j=0;j<*(ptr_idxs+1);j++) { 7669 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7670 } 7671 } else { 7672 /* TODO */ 7673 } 7674 ptr_idxs += olengths_idxs[i]; 7675 } 7676 if (new_local_nnz) { 7677 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7678 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7679 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7680 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7681 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7682 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7683 } else { 7684 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7685 } 7686 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7687 } else { 7688 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7689 } 7690 7691 /* set values */ 7692 ptr_vals = recv_buffer_vals; 7693 ptr_idxs = recv_buffer_idxs_local; 7694 for (i=0;i<n_recvs;i++) { 7695 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7696 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7697 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7698 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7699 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7700 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7701 } else { 7702 /* TODO */ 7703 } 7704 ptr_idxs += olengths_idxs[i]; 7705 ptr_vals += olengths_vals[i]; 7706 } 7707 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7708 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7709 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7710 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7711 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7712 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7713 7714 #if 0 7715 if (!restrict_comm) { /* check */ 7716 Vec lvec,rvec; 7717 PetscReal infty_error; 7718 7719 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7720 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7721 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7722 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7723 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7724 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7725 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7726 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7727 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7728 } 7729 #endif 7730 7731 /* assemble new additional is (if any) */ 7732 if (nis) { 7733 PetscInt **temp_idxs,*count_is,j,psum; 7734 7735 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7736 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7737 ptr_idxs = recv_buffer_idxs_is; 7738 psum = 0; 7739 for (i=0;i<n_recvs;i++) { 7740 for (j=0;j<nis;j++) { 7741 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7742 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7743 psum += plen; 7744 ptr_idxs += plen+1; /* shift pointer to received data */ 7745 } 7746 } 7747 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7748 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7749 for (i=1;i<nis;i++) { 7750 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7751 } 7752 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7753 ptr_idxs = recv_buffer_idxs_is; 7754 for (i=0;i<n_recvs;i++) { 7755 for (j=0;j<nis;j++) { 7756 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7757 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7758 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7759 ptr_idxs += plen+1; /* shift pointer to received data */ 7760 } 7761 } 7762 for (i=0;i<nis;i++) { 7763 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7764 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7765 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7766 } 7767 ierr = PetscFree(count_is);CHKERRQ(ierr); 7768 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7769 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7770 } 7771 /* free workspace */ 7772 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7773 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7774 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7775 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7776 if (isdense) { 7777 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7778 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7779 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7780 } else { 7781 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7782 } 7783 if (nis) { 7784 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7785 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7786 } 7787 7788 if (nvecs) { 7789 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7790 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7791 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7792 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7793 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7794 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7795 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7796 /* set values */ 7797 ptr_vals = recv_buffer_vecs; 7798 ptr_idxs = recv_buffer_idxs_local; 7799 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7800 for (i=0;i<n_recvs;i++) { 7801 PetscInt j; 7802 for (j=0;j<*(ptr_idxs+1);j++) { 7803 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7804 } 7805 ptr_idxs += olengths_idxs[i]; 7806 ptr_vals += olengths_idxs[i]-2; 7807 } 7808 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7809 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7810 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7811 } 7812 7813 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7814 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7815 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7816 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7817 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7818 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7819 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7820 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7821 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7822 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7823 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7824 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7825 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7826 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7827 ierr = PetscFree(onodes);CHKERRQ(ierr); 7828 if (nis) { 7829 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7830 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7831 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7832 } 7833 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7834 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7835 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7836 for (i=0;i<nis;i++) { 7837 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7838 } 7839 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7840 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7841 } 7842 *mat_n = NULL; 7843 } 7844 PetscFunctionReturn(0); 7845 } 7846 7847 /* temporary hack into ksp private data structure */ 7848 #include <petsc/private/kspimpl.h> 7849 7850 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7851 { 7852 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7853 PC_IS *pcis = (PC_IS*)pc->data; 7854 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7855 Mat coarsedivudotp = NULL; 7856 Mat coarseG,t_coarse_mat_is; 7857 MatNullSpace CoarseNullSpace = NULL; 7858 ISLocalToGlobalMapping coarse_islg; 7859 IS coarse_is,*isarray; 7860 PetscInt i,im_active=-1,active_procs=-1; 7861 PetscInt nis,nisdofs,nisneu,nisvert; 7862 PetscInt coarse_eqs_per_proc; 7863 PC pc_temp; 7864 PCType coarse_pc_type; 7865 KSPType coarse_ksp_type; 7866 PetscBool multilevel_requested,multilevel_allowed; 7867 PetscBool coarse_reuse; 7868 PetscInt ncoarse,nedcfield; 7869 PetscBool compute_vecs = PETSC_FALSE; 7870 PetscScalar *array; 7871 MatReuse coarse_mat_reuse; 7872 PetscBool restr, full_restr, have_void; 7873 PetscMPIInt size; 7874 PetscErrorCode ierr; 7875 7876 PetscFunctionBegin; 7877 ierr = PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 7878 /* Assign global numbering to coarse dofs */ 7879 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7880 PetscInt ocoarse_size; 7881 compute_vecs = PETSC_TRUE; 7882 7883 pcbddc->new_primal_space = PETSC_TRUE; 7884 ocoarse_size = pcbddc->coarse_size; 7885 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7886 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7887 /* see if we can avoid some work */ 7888 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7889 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7890 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7891 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7892 coarse_reuse = PETSC_FALSE; 7893 } else { /* we can safely reuse already computed coarse matrix */ 7894 coarse_reuse = PETSC_TRUE; 7895 } 7896 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7897 coarse_reuse = PETSC_FALSE; 7898 } 7899 /* reset any subassembling information */ 7900 if (!coarse_reuse || pcbddc->recompute_topography) { 7901 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7902 } 7903 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7904 coarse_reuse = PETSC_TRUE; 7905 } 7906 /* assemble coarse matrix */ 7907 if (coarse_reuse && pcbddc->coarse_ksp) { 7908 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7909 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7910 coarse_mat_reuse = MAT_REUSE_MATRIX; 7911 } else { 7912 coarse_mat = NULL; 7913 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7914 } 7915 7916 /* creates temporary l2gmap and IS for coarse indexes */ 7917 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7918 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7919 7920 /* creates temporary MATIS object for coarse matrix */ 7921 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7922 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7923 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7924 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7925 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7926 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7927 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7928 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7929 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7930 7931 /* count "active" (i.e. with positive local size) and "void" processes */ 7932 im_active = !!(pcis->n); 7933 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7934 7935 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7936 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7937 /* full_restr : just use the receivers from the subassembling pattern */ 7938 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7939 coarse_mat_is = NULL; 7940 multilevel_allowed = PETSC_FALSE; 7941 multilevel_requested = PETSC_FALSE; 7942 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7943 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7944 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 7945 if (multilevel_requested) { 7946 ncoarse = active_procs/pcbddc->coarsening_ratio; 7947 restr = PETSC_FALSE; 7948 full_restr = PETSC_FALSE; 7949 } else { 7950 ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc); 7951 restr = PETSC_TRUE; 7952 full_restr = PETSC_TRUE; 7953 } 7954 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7955 ncoarse = PetscMax(1,ncoarse); 7956 if (!pcbddc->coarse_subassembling) { 7957 if (pcbddc->coarsening_ratio > 1) { 7958 if (multilevel_requested) { 7959 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7960 } else { 7961 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7962 } 7963 } else { 7964 PetscMPIInt rank; 7965 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7966 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 7967 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7968 } 7969 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7970 PetscInt psum; 7971 if (pcbddc->coarse_ksp) psum = 1; 7972 else psum = 0; 7973 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7974 have_void = ncoarse < size ? PETSC_TRUE : PETSC_FALSE; 7975 } 7976 /* determine if we can go multilevel */ 7977 if (multilevel_requested) { 7978 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7979 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7980 } 7981 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7982 7983 /* dump subassembling pattern */ 7984 if (pcbddc->dbg_flag && multilevel_allowed) { 7985 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7986 } 7987 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7988 nedcfield = -1; 7989 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7990 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7991 const PetscInt *idxs; 7992 ISLocalToGlobalMapping tmap; 7993 7994 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7995 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7996 /* allocate space for temporary storage */ 7997 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7998 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7999 /* allocate for IS array */ 8000 nisdofs = pcbddc->n_ISForDofsLocal; 8001 if (pcbddc->nedclocal) { 8002 if (pcbddc->nedfield > -1) { 8003 nedcfield = pcbddc->nedfield; 8004 } else { 8005 nedcfield = 0; 8006 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs); 8007 nisdofs = 1; 8008 } 8009 } 8010 nisneu = !!pcbddc->NeumannBoundariesLocal; 8011 nisvert = 0; /* nisvert is not used */ 8012 nis = nisdofs + nisneu + nisvert; 8013 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 8014 /* dofs splitting */ 8015 for (i=0;i<nisdofs;i++) { 8016 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 8017 if (nedcfield != i) { 8018 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 8019 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 8020 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8021 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 8022 } else { 8023 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 8024 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8025 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8026 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout); 8027 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 8028 } 8029 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8030 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 8031 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 8032 } 8033 /* neumann boundaries */ 8034 if (pcbddc->NeumannBoundariesLocal) { 8035 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 8036 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 8037 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8038 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 8039 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 8040 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 8041 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 8042 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 8043 } 8044 /* free memory */ 8045 ierr = PetscFree(tidxs);CHKERRQ(ierr); 8046 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 8047 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 8048 } else { 8049 nis = 0; 8050 nisdofs = 0; 8051 nisneu = 0; 8052 nisvert = 0; 8053 isarray = NULL; 8054 } 8055 /* destroy no longer needed map */ 8056 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 8057 8058 /* subassemble */ 8059 if (multilevel_allowed) { 8060 Vec vp[1]; 8061 PetscInt nvecs = 0; 8062 PetscBool reuse,reuser; 8063 8064 if (coarse_mat) reuse = PETSC_TRUE; 8065 else reuse = PETSC_FALSE; 8066 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8067 vp[0] = NULL; 8068 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8069 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 8070 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 8071 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 8072 nvecs = 1; 8073 8074 if (pcbddc->divudotp) { 8075 Mat B,loc_divudotp; 8076 Vec v,p; 8077 IS dummy; 8078 PetscInt np; 8079 8080 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 8081 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 8082 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 8083 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 8084 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 8085 ierr = VecSet(p,1.);CHKERRQ(ierr); 8086 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 8087 ierr = VecDestroy(&p);CHKERRQ(ierr); 8088 ierr = MatDestroy(&B);CHKERRQ(ierr); 8089 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 8090 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 8091 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 8092 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 8093 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 8094 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8095 ierr = VecDestroy(&v);CHKERRQ(ierr); 8096 } 8097 } 8098 if (reuser) { 8099 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8100 } else { 8101 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8102 } 8103 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8104 PetscScalar *arraym,*arrayv; 8105 PetscInt nl; 8106 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 8107 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 8108 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8109 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 8110 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 8111 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 8112 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8113 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 8114 } else { 8115 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 8116 } 8117 } else { 8118 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 8119 } 8120 if (coarse_mat_is || coarse_mat) { 8121 if (!multilevel_allowed) { 8122 ierr = MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 8123 } else { 8124 Mat A; 8125 8126 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8127 if (coarse_mat_is) { 8128 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8129 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 8130 coarse_mat = coarse_mat_is; 8131 } 8132 /* be sure we don't have MatSeqDENSE as local mat */ 8133 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 8134 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 8135 } 8136 } 8137 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 8138 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 8139 8140 /* create local to global scatters for coarse problem */ 8141 if (compute_vecs) { 8142 PetscInt lrows; 8143 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 8144 if (coarse_mat) { 8145 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 8146 } else { 8147 lrows = 0; 8148 } 8149 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 8150 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 8151 ierr = VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);CHKERRQ(ierr); 8152 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8153 ierr = VecScatterCreateWithData(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8154 } 8155 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 8156 8157 /* set defaults for coarse KSP and PC */ 8158 if (multilevel_allowed) { 8159 coarse_ksp_type = KSPRICHARDSON; 8160 coarse_pc_type = PCBDDC; 8161 } else { 8162 coarse_ksp_type = KSPPREONLY; 8163 coarse_pc_type = PCREDUNDANT; 8164 } 8165 8166 /* print some info if requested */ 8167 if (pcbddc->dbg_flag) { 8168 if (!multilevel_allowed) { 8169 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8170 if (multilevel_requested) { 8171 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8172 } else if (pcbddc->max_levels) { 8173 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);CHKERRQ(ierr); 8174 } 8175 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8176 } 8177 } 8178 8179 /* communicate coarse discrete gradient */ 8180 coarseG = NULL; 8181 if (pcbddc->nedcG && multilevel_allowed) { 8182 MPI_Comm ccomm; 8183 if (coarse_mat) { 8184 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8185 } else { 8186 ccomm = MPI_COMM_NULL; 8187 } 8188 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 8189 } 8190 8191 /* create the coarse KSP object only once with defaults */ 8192 if (coarse_mat) { 8193 PetscBool isredundant,isnn,isbddc; 8194 PetscViewer dbg_viewer = NULL; 8195 8196 if (pcbddc->dbg_flag) { 8197 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8198 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8199 } 8200 if (!pcbddc->coarse_ksp) { 8201 char prefix[256],str_level[16]; 8202 size_t len; 8203 8204 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 8205 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8206 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 8207 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 8208 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8209 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 8210 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 8211 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8212 /* TODO is this logic correct? should check for coarse_mat type */ 8213 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8214 /* prefix */ 8215 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 8216 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 8217 if (!pcbddc->current_level) { 8218 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));CHKERRQ(ierr); 8219 ierr = PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));CHKERRQ(ierr); 8220 } else { 8221 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 8222 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8223 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8224 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8225 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 8226 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 8227 ierr = PetscStrlcat(prefix,str_level,sizeof(prefix));CHKERRQ(ierr); 8228 } 8229 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 8230 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8231 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8232 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8233 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8234 /* allow user customization */ 8235 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 8236 /* get some info after set from options */ 8237 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8238 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 8239 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8240 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 8241 if (multilevel_allowed && !isbddc && !isnn) { 8242 isbddc = PETSC_TRUE; 8243 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8244 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8245 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8246 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8247 } 8248 } 8249 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8250 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8251 if (nisdofs) { 8252 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 8253 for (i=0;i<nisdofs;i++) { 8254 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8255 } 8256 } 8257 if (nisneu) { 8258 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 8259 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 8260 } 8261 if (nisvert) { 8262 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 8263 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 8264 } 8265 if (coarseG) { 8266 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 8267 } 8268 8269 /* get some info after set from options */ 8270 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8271 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8272 if (isbddc && !multilevel_allowed) { 8273 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8274 isbddc = PETSC_FALSE; 8275 } 8276 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 8277 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 8278 if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) { 8279 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8280 isbddc = PETSC_TRUE; 8281 } 8282 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 8283 if (isredundant) { 8284 KSP inner_ksp; 8285 PC inner_pc; 8286 8287 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 8288 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 8289 } 8290 8291 /* parameters which miss an API */ 8292 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8293 if (isbddc) { 8294 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8295 8296 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8297 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8298 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8299 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8300 if (pcbddc_coarse->benign_saddle_point) { 8301 Mat coarsedivudotp_is; 8302 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8303 IS row,col; 8304 const PetscInt *gidxs; 8305 PetscInt n,st,M,N; 8306 8307 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 8308 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 8309 st = st-n; 8310 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 8311 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 8312 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 8313 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8314 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 8315 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8316 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 8317 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 8318 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 8319 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 8320 ierr = ISDestroy(&row);CHKERRQ(ierr); 8321 ierr = ISDestroy(&col);CHKERRQ(ierr); 8322 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 8323 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8324 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8325 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8326 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8327 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8328 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8329 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8330 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8331 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8332 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8333 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8334 } 8335 } 8336 8337 /* propagate symmetry info of coarse matrix */ 8338 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8339 if (pc->pmat->symmetric_set) { 8340 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8341 } 8342 if (pc->pmat->hermitian_set) { 8343 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8344 } 8345 if (pc->pmat->spd_set) { 8346 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8347 } 8348 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8349 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8350 } 8351 /* set operators */ 8352 ierr = MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");CHKERRQ(ierr); 8353 ierr = MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 8354 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8355 if (pcbddc->dbg_flag) { 8356 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8357 } 8358 } 8359 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8360 ierr = PetscFree(isarray);CHKERRQ(ierr); 8361 #if 0 8362 { 8363 PetscViewer viewer; 8364 char filename[256]; 8365 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8366 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8367 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8368 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8369 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8370 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8371 } 8372 #endif 8373 8374 if (pcbddc->coarse_ksp) { 8375 Vec crhs,csol; 8376 8377 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8378 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8379 if (!csol) { 8380 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8381 } 8382 if (!crhs) { 8383 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8384 } 8385 } 8386 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8387 8388 /* compute null space for coarse solver if the benign trick has been requested */ 8389 if (pcbddc->benign_null) { 8390 8391 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8392 for (i=0;i<pcbddc->benign_n;i++) { 8393 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8394 } 8395 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8396 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8397 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8398 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8399 if (coarse_mat) { 8400 Vec nullv; 8401 PetscScalar *array,*array2; 8402 PetscInt nl; 8403 8404 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8405 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8406 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8407 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8408 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 8409 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8410 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8411 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8412 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8413 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8414 } 8415 } 8416 ierr = PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8417 8418 ierr = PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8419 if (pcbddc->coarse_ksp) { 8420 PetscBool ispreonly; 8421 8422 if (CoarseNullSpace) { 8423 PetscBool isnull; 8424 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8425 if (isnull) { 8426 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8427 } 8428 /* TODO: add local nullspaces (if any) */ 8429 } 8430 /* setup coarse ksp */ 8431 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8432 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8433 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8434 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 8435 KSP check_ksp; 8436 KSPType check_ksp_type; 8437 PC check_pc; 8438 Vec check_vec,coarse_vec; 8439 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8440 PetscInt its; 8441 PetscBool compute_eigs; 8442 PetscReal *eigs_r,*eigs_c; 8443 PetscInt neigs; 8444 const char *prefix; 8445 8446 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8447 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8448 ierr = PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);CHKERRQ(ierr); 8449 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 8450 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8451 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8452 /* prevent from setup unneeded object */ 8453 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8454 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8455 if (ispreonly) { 8456 check_ksp_type = KSPPREONLY; 8457 compute_eigs = PETSC_FALSE; 8458 } else { 8459 check_ksp_type = KSPGMRES; 8460 compute_eigs = PETSC_TRUE; 8461 } 8462 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8463 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8464 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8465 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8466 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8467 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8468 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8469 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8470 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8471 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8472 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8473 /* create random vec */ 8474 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8475 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8476 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8477 /* solve coarse problem */ 8478 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8479 ierr = KSPCheckSolve(check_ksp,pc,coarse_vec);CHKERRQ(ierr); 8480 /* set eigenvalue estimation if preonly has not been requested */ 8481 if (compute_eigs) { 8482 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8483 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8484 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8485 if (neigs) { 8486 lambda_max = eigs_r[neigs-1]; 8487 lambda_min = eigs_r[0]; 8488 if (pcbddc->use_coarse_estimates) { 8489 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8490 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8491 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8492 } 8493 } 8494 } 8495 } 8496 8497 /* check coarse problem residual error */ 8498 if (pcbddc->dbg_flag) { 8499 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8500 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8501 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8502 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8503 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8504 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8505 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8506 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8507 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8508 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8509 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8510 if (CoarseNullSpace) { 8511 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8512 } 8513 if (compute_eigs) { 8514 PetscReal lambda_max_s,lambda_min_s; 8515 KSPConvergedReason reason; 8516 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8517 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8518 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8519 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8520 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8521 for (i=0;i<neigs;i++) { 8522 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8523 } 8524 } 8525 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8526 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8527 } 8528 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8529 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8530 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8531 if (compute_eigs) { 8532 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8533 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8534 } 8535 } 8536 } 8537 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8538 /* print additional info */ 8539 if (pcbddc->dbg_flag) { 8540 /* waits until all processes reaches this point */ 8541 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8542 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);CHKERRQ(ierr); 8543 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8544 } 8545 8546 /* free memory */ 8547 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8548 ierr = PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8549 PetscFunctionReturn(0); 8550 } 8551 8552 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8553 { 8554 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8555 PC_IS* pcis = (PC_IS*)pc->data; 8556 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8557 IS subset,subset_mult,subset_n; 8558 PetscInt local_size,coarse_size=0; 8559 PetscInt *local_primal_indices=NULL; 8560 const PetscInt *t_local_primal_indices; 8561 PetscErrorCode ierr; 8562 8563 PetscFunctionBegin; 8564 /* Compute global number of coarse dofs */ 8565 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8566 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8567 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8568 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8569 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8570 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8571 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8572 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8573 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8574 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8575 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8576 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8577 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 8578 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8579 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8580 8581 /* check numbering */ 8582 if (pcbddc->dbg_flag) { 8583 PetscScalar coarsesum,*array,*array2; 8584 PetscInt i; 8585 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8586 8587 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8588 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8589 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8590 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8591 /* counter */ 8592 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8593 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8594 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8595 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8596 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8597 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8598 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8599 for (i=0;i<pcbddc->local_primal_size;i++) { 8600 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8601 } 8602 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8603 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8604 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8605 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8606 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8607 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8608 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8609 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8610 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8611 for (i=0;i<pcis->n;i++) { 8612 if (array[i] != 0.0 && array[i] != array2[i]) { 8613 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8614 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8615 set_error = PETSC_TRUE; 8616 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8617 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8618 } 8619 } 8620 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8621 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8622 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8623 for (i=0;i<pcis->n;i++) { 8624 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8625 } 8626 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8627 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8628 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8629 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8630 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8631 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8632 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8633 PetscInt *gidxs; 8634 8635 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8636 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8637 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8638 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8639 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8640 for (i=0;i<pcbddc->local_primal_size;i++) { 8641 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8642 } 8643 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8644 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8645 } 8646 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8647 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8648 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8649 } 8650 8651 /* get back data */ 8652 *coarse_size_n = coarse_size; 8653 *local_primal_indices_n = local_primal_indices; 8654 PetscFunctionReturn(0); 8655 } 8656 8657 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8658 { 8659 IS localis_t; 8660 PetscInt i,lsize,*idxs,n; 8661 PetscScalar *vals; 8662 PetscErrorCode ierr; 8663 8664 PetscFunctionBegin; 8665 /* get indices in local ordering exploiting local to global map */ 8666 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8667 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8668 for (i=0;i<lsize;i++) vals[i] = 1.0; 8669 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8670 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8671 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8672 if (idxs) { /* multilevel guard */ 8673 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 8674 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8675 } 8676 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8677 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8678 ierr = PetscFree(vals);CHKERRQ(ierr); 8679 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8680 /* now compute set in local ordering */ 8681 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8682 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8683 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8684 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8685 for (i=0,lsize=0;i<n;i++) { 8686 if (PetscRealPart(vals[i]) > 0.5) { 8687 lsize++; 8688 } 8689 } 8690 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8691 for (i=0,lsize=0;i<n;i++) { 8692 if (PetscRealPart(vals[i]) > 0.5) { 8693 idxs[lsize++] = i; 8694 } 8695 } 8696 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8697 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8698 *localis = localis_t; 8699 PetscFunctionReturn(0); 8700 } 8701 8702 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8703 { 8704 PC_IS *pcis=(PC_IS*)pc->data; 8705 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8706 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8707 Mat S_j; 8708 PetscInt *used_xadj,*used_adjncy; 8709 PetscBool free_used_adj; 8710 PetscErrorCode ierr; 8711 8712 PetscFunctionBegin; 8713 ierr = PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8714 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8715 free_used_adj = PETSC_FALSE; 8716 if (pcbddc->sub_schurs_layers == -1) { 8717 used_xadj = NULL; 8718 used_adjncy = NULL; 8719 } else { 8720 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8721 used_xadj = pcbddc->mat_graph->xadj; 8722 used_adjncy = pcbddc->mat_graph->adjncy; 8723 } else if (pcbddc->computed_rowadj) { 8724 used_xadj = pcbddc->mat_graph->xadj; 8725 used_adjncy = pcbddc->mat_graph->adjncy; 8726 } else { 8727 PetscBool flg_row=PETSC_FALSE; 8728 const PetscInt *xadj,*adjncy; 8729 PetscInt nvtxs; 8730 8731 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8732 if (flg_row) { 8733 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8734 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8735 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8736 free_used_adj = PETSC_TRUE; 8737 } else { 8738 pcbddc->sub_schurs_layers = -1; 8739 used_xadj = NULL; 8740 used_adjncy = NULL; 8741 } 8742 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8743 } 8744 } 8745 8746 /* setup sub_schurs data */ 8747 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8748 if (!sub_schurs->schur_explicit) { 8749 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8750 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8751 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8752 } else { 8753 Mat change = NULL; 8754 Vec scaling = NULL; 8755 IS change_primal = NULL, iP; 8756 PetscInt benign_n; 8757 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8758 PetscBool isseqaij,need_change = PETSC_FALSE; 8759 PetscBool discrete_harmonic = PETSC_FALSE; 8760 8761 if (!pcbddc->use_vertices && reuse_solvers) { 8762 PetscInt n_vertices; 8763 8764 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8765 reuse_solvers = (PetscBool)!n_vertices; 8766 } 8767 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8768 if (!isseqaij) { 8769 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8770 if (matis->A == pcbddc->local_mat) { 8771 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8772 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8773 } else { 8774 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8775 } 8776 } 8777 if (!pcbddc->benign_change_explicit) { 8778 benign_n = pcbddc->benign_n; 8779 } else { 8780 benign_n = 0; 8781 } 8782 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8783 We need a global reduction to avoid possible deadlocks. 8784 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8785 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8786 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8787 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8788 need_change = (PetscBool)(!need_change); 8789 } 8790 /* If the user defines additional constraints, we import them here. 8791 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8792 if (need_change) { 8793 PC_IS *pcisf; 8794 PC_BDDC *pcbddcf; 8795 PC pcf; 8796 8797 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8798 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8799 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8800 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8801 8802 /* hacks */ 8803 pcisf = (PC_IS*)pcf->data; 8804 pcisf->is_B_local = pcis->is_B_local; 8805 pcisf->vec1_N = pcis->vec1_N; 8806 pcisf->BtoNmap = pcis->BtoNmap; 8807 pcisf->n = pcis->n; 8808 pcisf->n_B = pcis->n_B; 8809 pcbddcf = (PC_BDDC*)pcf->data; 8810 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8811 pcbddcf->mat_graph = pcbddc->mat_graph; 8812 pcbddcf->use_faces = PETSC_TRUE; 8813 pcbddcf->use_change_of_basis = PETSC_TRUE; 8814 pcbddcf->use_change_on_faces = PETSC_TRUE; 8815 pcbddcf->use_qr_single = PETSC_TRUE; 8816 pcbddcf->fake_change = PETSC_TRUE; 8817 8818 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8819 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8820 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8821 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8822 change = pcbddcf->ConstraintMatrix; 8823 pcbddcf->ConstraintMatrix = NULL; 8824 8825 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8826 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8827 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8828 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8829 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8830 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8831 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8832 pcf->ops->destroy = NULL; 8833 pcf->ops->reset = NULL; 8834 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8835 } 8836 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8837 8838 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 8839 if (iP) { 8840 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 8841 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 8842 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8843 } 8844 if (discrete_harmonic) { 8845 Mat A; 8846 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 8847 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 8848 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 8849 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8850 ierr = MatDestroy(&A);CHKERRQ(ierr); 8851 } else { 8852 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8853 } 8854 ierr = MatDestroy(&change);CHKERRQ(ierr); 8855 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8856 } 8857 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8858 8859 /* free adjacency */ 8860 if (free_used_adj) { 8861 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8862 } 8863 ierr = PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8864 PetscFunctionReturn(0); 8865 } 8866 8867 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8868 { 8869 PC_IS *pcis=(PC_IS*)pc->data; 8870 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8871 PCBDDCGraph graph; 8872 PetscErrorCode ierr; 8873 8874 PetscFunctionBegin; 8875 /* attach interface graph for determining subsets */ 8876 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8877 IS verticesIS,verticescomm; 8878 PetscInt vsize,*idxs; 8879 8880 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8881 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8882 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8883 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8884 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8885 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8886 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8887 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8888 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8889 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8890 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8891 } else { 8892 graph = pcbddc->mat_graph; 8893 } 8894 /* print some info */ 8895 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8896 IS vertices; 8897 PetscInt nv,nedges,nfaces; 8898 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8899 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8900 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8901 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8902 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8903 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);CHKERRQ(ierr); 8904 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);CHKERRQ(ierr); 8905 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8906 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8907 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8908 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8909 } 8910 8911 /* sub_schurs init */ 8912 if (!pcbddc->sub_schurs) { 8913 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8914 } 8915 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8916 8917 /* free graph struct */ 8918 if (pcbddc->sub_schurs_rebuild) { 8919 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8920 } 8921 PetscFunctionReturn(0); 8922 } 8923 8924 PetscErrorCode PCBDDCCheckOperator(PC pc) 8925 { 8926 PC_IS *pcis=(PC_IS*)pc->data; 8927 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8928 PetscErrorCode ierr; 8929 8930 PetscFunctionBegin; 8931 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8932 IS zerodiag = NULL; 8933 Mat S_j,B0_B=NULL; 8934 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8935 PetscScalar *p0_check,*array,*array2; 8936 PetscReal norm; 8937 PetscInt i; 8938 8939 /* B0 and B0_B */ 8940 if (zerodiag) { 8941 IS dummy; 8942 8943 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8944 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8945 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8946 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8947 } 8948 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8949 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8950 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8951 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8952 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8953 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8954 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8955 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8956 /* S_j */ 8957 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8958 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8959 8960 /* mimic vector in \widetilde{W}_\Gamma */ 8961 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8962 /* continuous in primal space */ 8963 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8964 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8965 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8966 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8967 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8968 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8969 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8970 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8971 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8972 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8973 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8974 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8975 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8976 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8977 8978 /* assemble rhs for coarse problem */ 8979 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8980 /* local with Schur */ 8981 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8982 if (zerodiag) { 8983 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8984 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8985 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8986 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8987 } 8988 /* sum on primal nodes the local contributions */ 8989 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8990 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8991 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8992 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8993 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8994 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8995 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8996 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8997 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8998 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8999 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9000 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 9001 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9002 /* scale primal nodes (BDDC sums contibutions) */ 9003 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 9004 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 9005 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 9006 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 9007 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 9008 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9009 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 9010 /* global: \widetilde{B0}_B w_\Gamma */ 9011 if (zerodiag) { 9012 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 9013 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 9014 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 9015 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 9016 } 9017 /* BDDC */ 9018 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 9019 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 9020 9021 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 9022 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 9023 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 9024 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);CHKERRQ(ierr); 9025 for (i=0;i<pcbddc->benign_n;i++) { 9026 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));CHKERRQ(ierr); 9027 } 9028 ierr = PetscFree(p0_check);CHKERRQ(ierr); 9029 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 9030 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 9031 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 9032 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 9033 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 9034 } 9035 PetscFunctionReturn(0); 9036 } 9037 9038 #include <../src/mat/impls/aij/mpi/mpiaij.h> 9039 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 9040 { 9041 Mat At; 9042 IS rows; 9043 PetscInt rst,ren; 9044 PetscErrorCode ierr; 9045 PetscLayout rmap; 9046 9047 PetscFunctionBegin; 9048 rst = ren = 0; 9049 if (ccomm != MPI_COMM_NULL) { 9050 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 9051 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 9052 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 9053 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 9054 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 9055 } 9056 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 9057 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 9058 ierr = ISDestroy(&rows);CHKERRQ(ierr); 9059 9060 if (ccomm != MPI_COMM_NULL) { 9061 Mat_MPIAIJ *a,*b; 9062 IS from,to; 9063 Vec gvec; 9064 PetscInt lsize; 9065 9066 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 9067 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 9068 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 9069 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 9070 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 9071 a = (Mat_MPIAIJ*)At->data; 9072 b = (Mat_MPIAIJ*)(*B)->data; 9073 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 9074 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 9075 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 9076 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 9077 b->A = a->A; 9078 b->B = a->B; 9079 9080 b->donotstash = a->donotstash; 9081 b->roworiented = a->roworiented; 9082 b->rowindices = 0; 9083 b->rowvalues = 0; 9084 b->getrowactive = PETSC_FALSE; 9085 9086 (*B)->rmap = rmap; 9087 (*B)->factortype = A->factortype; 9088 (*B)->assembled = PETSC_TRUE; 9089 (*B)->insertmode = NOT_SET_VALUES; 9090 (*B)->preallocated = PETSC_TRUE; 9091 9092 if (a->colmap) { 9093 #if defined(PETSC_USE_CTABLE) 9094 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 9095 #else 9096 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 9097 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9098 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9099 #endif 9100 } else b->colmap = 0; 9101 if (a->garray) { 9102 PetscInt len; 9103 len = a->B->cmap->n; 9104 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 9105 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 9106 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 9107 } else b->garray = 0; 9108 9109 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 9110 b->lvec = a->lvec; 9111 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 9112 9113 /* cannot use VecScatterCopy */ 9114 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 9115 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 9116 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 9117 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 9118 ierr = VecScatterCreateWithData(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 9119 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 9120 ierr = ISDestroy(&from);CHKERRQ(ierr); 9121 ierr = ISDestroy(&to);CHKERRQ(ierr); 9122 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 9123 } 9124 ierr = MatDestroy(&At);CHKERRQ(ierr); 9125 PetscFunctionReturn(0); 9126 } 9127