1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 #if !defined(PETSC_USE_COMPLEX) 18 PetscScalar *uwork,*data,*U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM,bN,lwork,lierr,di = 1; 21 PetscInt ulw,i,nr,nc,n; 22 PetscErrorCode ierr; 23 24 PetscFunctionBegin; 25 #if defined(PETSC_MISSING_LAPACK_GESVD) 26 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 27 #else 28 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 29 if (!nr || !nc) PetscFunctionReturn(0); 30 31 /* workspace */ 32 if (!work) { 33 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 34 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 35 } else { 36 ulw = lw; 37 uwork = work; 38 } 39 n = PetscMin(nr,nc); 40 if (!rwork) { 41 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 42 } else { 43 sing = rwork; 44 } 45 46 /* SVD */ 47 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 49 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 50 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 51 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 52 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 53 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 54 ierr = PetscFPTrapPop();CHKERRQ(ierr); 55 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 56 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 57 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 58 if (!rwork) { 59 ierr = PetscFree(sing);CHKERRQ(ierr); 60 } 61 if (!work) { 62 ierr = PetscFree(uwork);CHKERRQ(ierr); 63 } 64 /* create B */ 65 if (!range) { 66 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 67 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 68 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 69 } else { 70 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 71 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 72 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 73 } 74 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 75 ierr = PetscFree(U);CHKERRQ(ierr); 76 #endif 77 #else /* PETSC_USE_COMPLEX */ 78 PetscFunctionBegin; 79 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 80 #endif 81 PetscFunctionReturn(0); 82 } 83 84 /* TODO REMOVE */ 85 #if defined(PRINT_GDET) 86 static int inc = 0; 87 static int lev = 0; 88 #endif 89 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 PetscScalar *vals,v; 122 123 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 124 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 125 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 126 /* v = PetscAbsScalar(vals[0]) */; 127 v = 1.; 128 cvals[0] = vals[0]/v; 129 cvals[1] = vals[1]/v; 130 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 131 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 132 #if defined(PRINT_GDET) 133 { 134 PetscViewer viewer; 135 char filename[256]; 136 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 137 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 138 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 140 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 141 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 142 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 143 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 144 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 145 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 146 } 147 #endif 148 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 149 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 150 } 151 152 PetscFunctionReturn(0); 153 } 154 155 PetscErrorCode PCBDDCNedelecSupport(PC pc) 156 { 157 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 158 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 159 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 160 Vec tvec; 161 PetscSF sfv; 162 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 163 MPI_Comm comm; 164 IS lned,primals,allprimals,nedfieldlocal; 165 IS *eedges,*extrows,*extcols,*alleedges; 166 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 167 PetscScalar *vals,*work; 168 PetscReal *rwork; 169 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 170 PetscInt ne,nv,Lv,order,n,field; 171 PetscInt n_neigh,*neigh,*n_shared,**shared; 172 PetscInt i,j,extmem,cum,maxsize,nee; 173 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 174 PetscInt *sfvleaves,*sfvroots; 175 PetscInt *corners,*cedges; 176 PetscInt *ecount,**eneighs,*vcount,**vneighs; 177 #if defined(PETSC_USE_DEBUG) 178 PetscInt *emarks; 179 #endif 180 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 181 PetscErrorCode ierr; 182 183 PetscFunctionBegin; 184 /* If the discrete gradient is defined for a subset of dofs and global is true, 185 it assumes G is given in global ordering for all the dofs. 186 Otherwise, the ordering is global for the Nedelec field */ 187 order = pcbddc->nedorder; 188 conforming = pcbddc->conforming; 189 field = pcbddc->nedfield; 190 global = pcbddc->nedglobal; 191 setprimal = PETSC_FALSE; 192 print = PETSC_FALSE; 193 singular = PETSC_FALSE; 194 195 /* Command line customization */ 196 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 199 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 200 /* print debug info TODO: to be removed */ 201 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 202 ierr = PetscOptionsEnd();CHKERRQ(ierr); 203 204 /* Return if there are no edges in the decomposition and the problem is not singular */ 205 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 206 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 207 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 208 if (!singular) { 209 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 210 lrc[0] = PETSC_FALSE; 211 for (i=0;i<n;i++) { 212 if (PetscRealPart(vals[i]) > 2.) { 213 lrc[0] = PETSC_TRUE; 214 break; 215 } 216 } 217 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 218 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 219 if (!lrc[1]) PetscFunctionReturn(0); 220 } 221 222 /* Get Nedelec field */ 223 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %D: number of fields is %D",field,pcbddc->n_ISForDofsLocal); 224 if (pcbddc->n_ISForDofsLocal && field >= 0) { 225 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 226 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 227 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 228 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 229 ne = n; 230 nedfieldlocal = NULL; 231 global = PETSC_TRUE; 232 } else if (field == PETSC_DECIDE) { 233 PetscInt rst,ren,*idx; 234 235 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 236 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 237 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 238 for (i=rst;i<ren;i++) { 239 PetscInt nc; 240 241 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 242 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 243 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 244 } 245 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 246 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 247 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 248 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 249 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 250 } else { 251 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 252 } 253 254 /* Sanity checks */ 255 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 256 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 257 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %D it's not a multiple of the order %D",ne,order); 258 259 /* Just set primal dofs and return */ 260 if (setprimal) { 261 IS enedfieldlocal; 262 PetscInt *eidxs; 263 264 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 265 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 266 if (nedfieldlocal) { 267 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 268 for (i=0,cum=0;i<ne;i++) { 269 if (PetscRealPart(vals[idxs[i]]) > 2.) { 270 eidxs[cum++] = idxs[i]; 271 } 272 } 273 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 274 } else { 275 for (i=0,cum=0;i<ne;i++) { 276 if (PetscRealPart(vals[i]) > 2.) { 277 eidxs[cum++] = i; 278 } 279 } 280 } 281 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 282 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 283 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 284 ierr = PetscFree(eidxs);CHKERRQ(ierr); 285 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 286 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 /* Compute some l2g maps */ 291 if (nedfieldlocal) { 292 IS is; 293 294 /* need to map from the local Nedelec field to local numbering */ 295 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 296 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 297 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 298 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 299 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 300 if (global) { 301 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 302 el2g = al2g; 303 } else { 304 IS gis; 305 306 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 307 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 308 ierr = ISDestroy(&gis);CHKERRQ(ierr); 309 } 310 ierr = ISDestroy(&is);CHKERRQ(ierr); 311 } else { 312 /* restore default */ 313 pcbddc->nedfield = -1; 314 /* one ref for the destruction of al2g, one for el2g */ 315 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 316 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 317 el2g = al2g; 318 fl2g = NULL; 319 } 320 321 /* Start communication to drop connections for interior edges (for cc analysis only) */ 322 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 323 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 324 if (nedfieldlocal) { 325 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 326 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 327 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 328 } else { 329 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 330 } 331 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 332 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 333 334 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 335 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 336 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 337 if (global) { 338 PetscInt rst; 339 340 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 341 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 342 if (matis->sf_rootdata[i] < 2) { 343 matis->sf_rootdata[cum++] = i + rst; 344 } 345 } 346 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 347 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 348 } else { 349 PetscInt *tbz; 350 351 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 352 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 353 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 354 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 355 for (i=0,cum=0;i<ne;i++) 356 if (matis->sf_leafdata[idxs[i]] == 1) 357 tbz[cum++] = i; 358 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 359 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 360 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 361 ierr = PetscFree(tbz);CHKERRQ(ierr); 362 } 363 } else { /* we need the entire G to infer the nullspace */ 364 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 365 G = pcbddc->discretegradient; 366 } 367 368 /* Extract subdomain relevant rows of G */ 369 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 370 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 371 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 372 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 373 ierr = ISDestroy(&lned);CHKERRQ(ierr); 374 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 375 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 376 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 377 378 /* SF for nodal dofs communications */ 379 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 380 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 381 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 382 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 383 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 384 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 385 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 386 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 387 i = singular ? 2 : 1; 388 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 389 390 /* Destroy temporary G created in MATIS format and modified G */ 391 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 392 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 393 ierr = MatDestroy(&G);CHKERRQ(ierr); 394 395 if (print) { 396 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 397 ierr = MatView(lG,NULL);CHKERRQ(ierr); 398 } 399 400 /* Save lG for values insertion in change of basis */ 401 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 402 403 /* Analyze the edge-nodes connections (duplicate lG) */ 404 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 405 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 406 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 407 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 409 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 410 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 411 /* need to import the boundary specification to ensure the 412 proper detection of coarse edges' endpoints */ 413 if (pcbddc->DirichletBoundariesLocal) { 414 IS is; 415 416 if (fl2g) { 417 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 418 } else { 419 is = pcbddc->DirichletBoundariesLocal; 420 } 421 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 422 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 423 for (i=0;i<cum;i++) { 424 if (idxs[i] >= 0) { 425 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 426 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 427 } 428 } 429 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 430 if (fl2g) { 431 ierr = ISDestroy(&is);CHKERRQ(ierr); 432 } 433 } 434 if (pcbddc->NeumannBoundariesLocal) { 435 IS is; 436 437 if (fl2g) { 438 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 439 } else { 440 is = pcbddc->NeumannBoundariesLocal; 441 } 442 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 443 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 444 for (i=0;i<cum;i++) { 445 if (idxs[i] >= 0) { 446 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 447 } 448 } 449 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 450 if (fl2g) { 451 ierr = ISDestroy(&is);CHKERRQ(ierr); 452 } 453 } 454 455 /* Count neighs per dof */ 456 ierr = ISLocalToGlobalMappingGetNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 457 ierr = ISLocalToGlobalMappingGetNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 458 459 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 460 for proper detection of coarse edges' endpoints */ 461 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 462 for (i=0;i<ne;i++) { 463 if ((ecount[i] > 2 && !PetscBTLookup(btbd,i)) || (ecount[i] == 2 && PetscBTLookup(btb,i))) { 464 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 465 } 466 } 467 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 468 if (!conforming) { 469 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 470 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 471 } 472 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 473 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 474 cum = 0; 475 for (i=0;i<ne;i++) { 476 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 477 if (!PetscBTLookup(btee,i)) { 478 marks[cum++] = i; 479 continue; 480 } 481 /* set badly connected edge dofs as primal */ 482 if (!conforming) { 483 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 484 marks[cum++] = i; 485 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 486 for (j=ii[i];j<ii[i+1];j++) { 487 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 488 } 489 } else { 490 /* every edge dofs should be connected trough a certain number of nodal dofs 491 to other edge dofs belonging to coarse edges 492 - at most 2 endpoints 493 - order-1 interior nodal dofs 494 - no undefined nodal dofs (nconn < order) 495 */ 496 PetscInt ends = 0,ints = 0, undef = 0; 497 for (j=ii[i];j<ii[i+1];j++) { 498 PetscInt v = jj[j],k; 499 PetscInt nconn = iit[v+1]-iit[v]; 500 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 501 if (nconn > order) ends++; 502 else if (nconn == order) ints++; 503 else undef++; 504 } 505 if (undef || ends > 2 || ints != order -1) { 506 marks[cum++] = i; 507 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 508 for (j=ii[i];j<ii[i+1];j++) { 509 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 510 } 511 } 512 } 513 } 514 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 515 if (!order && ii[i+1] != ii[i]) { 516 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 517 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 518 } 519 } 520 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 521 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 522 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 523 if (!conforming) { 524 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 525 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 526 } 527 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 528 529 /* identify splitpoints and corner candidates */ 530 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 531 if (print) { 532 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 533 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 534 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 535 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 536 } 537 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 538 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 539 for (i=0;i<nv;i++) { 540 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 541 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 542 if (!order) { /* variable order */ 543 PetscReal vorder = 0.; 544 545 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 546 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 547 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%D)",vorder,test); 548 ord = 1; 549 } 550 #if defined(PETSC_USE_DEBUG) 551 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %D connected with nodal dof %D with order %D",test,i,ord); 552 #endif 553 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 554 if (PetscBTLookup(btbd,jj[j])) { 555 bdir = PETSC_TRUE; 556 break; 557 } 558 if (vc != ecount[jj[j]]) { 559 sneighs = PETSC_FALSE; 560 } else { 561 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 562 for (k=0;k<vc;k++) { 563 if (vn[k] != en[k]) { 564 sneighs = PETSC_FALSE; 565 break; 566 } 567 } 568 } 569 } 570 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 571 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %D (%D %D %D)\n",i,!sneighs,test >= 3*ord,bdir); 572 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 573 } else if (test == ord) { 574 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 575 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %D\n",i); 576 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 577 } else { 578 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %D\n",i); 579 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 580 } 581 } 582 } 583 ierr = ISLocalToGlobalMappingRestoreNodeInfo(el2g,NULL,&ecount,&eneighs);CHKERRQ(ierr); 584 ierr = ISLocalToGlobalMappingRestoreNodeInfo(vl2g,NULL,&vcount,&vneighs);CHKERRQ(ierr); 585 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 586 587 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 588 if (order != 1) { 589 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 590 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 591 for (i=0;i<nv;i++) { 592 if (PetscBTLookup(btvcand,i)) { 593 PetscBool found = PETSC_FALSE; 594 for (j=ii[i];j<ii[i+1] && !found;j++) { 595 PetscInt k,e = jj[j]; 596 if (PetscBTLookup(bte,e)) continue; 597 for (k=iit[e];k<iit[e+1];k++) { 598 PetscInt v = jjt[k]; 599 if (v != i && PetscBTLookup(btvcand,v)) { 600 found = PETSC_TRUE; 601 break; 602 } 603 } 604 } 605 if (!found) { 606 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D CLEARED\n",i); 607 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 608 } else { 609 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %D ACCEPTED\n",i); 610 } 611 } 612 } 613 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 614 } 615 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 616 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 617 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 618 619 /* Get the local G^T explicitly */ 620 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 621 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 622 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 623 624 /* Mark interior nodal dofs */ 625 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 626 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 627 for (i=1;i<n_neigh;i++) { 628 for (j=0;j<n_shared[i];j++) { 629 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 630 } 631 } 632 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 633 634 /* communicate corners and splitpoints */ 635 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 636 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 637 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 638 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 639 640 if (print) { 641 IS tbz; 642 643 cum = 0; 644 for (i=0;i<nv;i++) 645 if (sfvleaves[i]) 646 vmarks[cum++] = i; 647 648 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 649 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 650 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 651 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 652 } 653 654 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 655 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 656 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 657 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 658 659 /* Zero rows of lGt corresponding to identified corners 660 and interior nodal dofs */ 661 cum = 0; 662 for (i=0;i<nv;i++) { 663 if (sfvleaves[i]) { 664 vmarks[cum++] = i; 665 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 666 } 667 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 668 } 669 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 670 if (print) { 671 IS tbz; 672 673 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 674 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 675 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 676 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 677 } 678 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 679 ierr = PetscFree(vmarks);CHKERRQ(ierr); 680 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 681 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 682 683 /* Recompute G */ 684 ierr = MatDestroy(&lG);CHKERRQ(ierr); 685 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 686 if (print) { 687 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 688 ierr = MatView(lG,NULL);CHKERRQ(ierr); 689 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 690 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 691 } 692 693 /* Get primal dofs (if any) */ 694 cum = 0; 695 for (i=0;i<ne;i++) { 696 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 697 } 698 if (fl2g) { 699 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 700 } 701 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 702 if (print) { 703 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 704 ierr = ISView(primals,NULL);CHKERRQ(ierr); 705 } 706 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 707 /* TODO: what if the user passed in some of them ? */ 708 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 709 ierr = ISDestroy(&primals);CHKERRQ(ierr); 710 711 /* Compute edge connectivity */ 712 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 713 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 714 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 715 if (fl2g) { 716 PetscBT btf; 717 PetscInt *iia,*jja,*iiu,*jju; 718 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 719 720 /* create CSR for all local dofs */ 721 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 722 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 723 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %D. Should be %D",pcbddc->mat_graph->nvtxs_csr,n); 724 iiu = pcbddc->mat_graph->xadj; 725 jju = pcbddc->mat_graph->adjncy; 726 } else if (pcbddc->use_local_adj) { 727 rest = PETSC_TRUE; 728 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 729 } else { 730 free = PETSC_TRUE; 731 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 732 iiu[0] = 0; 733 for (i=0;i<n;i++) { 734 iiu[i+1] = i+1; 735 jju[i] = -1; 736 } 737 } 738 739 /* import sizes of CSR */ 740 iia[0] = 0; 741 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 742 743 /* overwrite entries corresponding to the Nedelec field */ 744 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 745 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 746 for (i=0;i<ne;i++) { 747 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 748 iia[idxs[i]+1] = ii[i+1]-ii[i]; 749 } 750 751 /* iia in CSR */ 752 for (i=0;i<n;i++) iia[i+1] += iia[i]; 753 754 /* jja in CSR */ 755 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 756 for (i=0;i<n;i++) 757 if (!PetscBTLookup(btf,i)) 758 for (j=0;j<iiu[i+1]-iiu[i];j++) 759 jja[iia[i]+j] = jju[iiu[i]+j]; 760 761 /* map edge dofs connectivity */ 762 if (jj) { 763 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 764 for (i=0;i<ne;i++) { 765 PetscInt e = idxs[i]; 766 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 767 } 768 } 769 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 770 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 771 if (rest) { 772 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 773 } 774 if (free) { 775 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 776 } 777 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 778 } else { 779 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 780 } 781 782 /* Analyze interface for edge dofs */ 783 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 784 pcbddc->mat_graph->twodim = PETSC_FALSE; 785 786 /* Get coarse edges in the edge space */ 787 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 788 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 789 790 if (fl2g) { 791 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 792 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 793 for (i=0;i<nee;i++) { 794 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 795 } 796 } else { 797 eedges = alleedges; 798 primals = allprimals; 799 } 800 801 /* Mark fine edge dofs with their coarse edge id */ 802 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 803 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 804 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 805 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 806 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 807 if (print) { 808 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 809 ierr = ISView(primals,NULL);CHKERRQ(ierr); 810 } 811 812 maxsize = 0; 813 for (i=0;i<nee;i++) { 814 PetscInt size,mark = i+1; 815 816 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 817 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 818 for (j=0;j<size;j++) marks[idxs[j]] = mark; 819 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 820 maxsize = PetscMax(maxsize,size); 821 } 822 823 /* Find coarse edge endpoints */ 824 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 825 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 826 for (i=0;i<nee;i++) { 827 PetscInt mark = i+1,size; 828 829 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 830 if (!size && nedfieldlocal) continue; 831 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 832 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 833 if (print) { 834 ierr = PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %D\n",i);CHKERRQ(ierr); 835 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 836 } 837 for (j=0;j<size;j++) { 838 PetscInt k, ee = idxs[j]; 839 if (print) PetscPrintf(PETSC_COMM_SELF," idx %D\n",ee); 840 for (k=ii[ee];k<ii[ee+1];k++) { 841 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %D\n",jj[k]); 842 if (PetscBTLookup(btv,jj[k])) { 843 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %D\n",jj[k]); 844 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 845 PetscInt k2; 846 PetscBool corner = PETSC_FALSE; 847 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 848 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %D: mark %D (ref mark %D), boundary %D\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 849 /* it's a corner if either is connected with an edge dof belonging to a different cc or 850 if the edge dof lie on the natural part of the boundary */ 851 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 852 corner = PETSC_TRUE; 853 break; 854 } 855 } 856 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 857 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %D\n",jj[k]); 858 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 859 } else { 860 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 861 } 862 } 863 } 864 } 865 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 866 } 867 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 868 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 869 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 870 871 /* Reset marked primal dofs */ 872 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 873 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 874 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 875 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 876 877 /* Now use the initial lG */ 878 ierr = MatDestroy(&lG);CHKERRQ(ierr); 879 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 880 lG = lGinit; 881 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 882 883 /* Compute extended cols indices */ 884 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 885 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 886 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 887 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 888 i *= maxsize; 889 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 890 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 891 eerr = PETSC_FALSE; 892 for (i=0;i<nee;i++) { 893 PetscInt size,found = 0; 894 895 cum = 0; 896 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 897 if (!size && nedfieldlocal) continue; 898 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 899 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 900 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 901 for (j=0;j<size;j++) { 902 PetscInt k,ee = idxs[j]; 903 for (k=ii[ee];k<ii[ee+1];k++) { 904 PetscInt vv = jj[k]; 905 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 906 else if (!PetscBTLookupSet(btvc,vv)) found++; 907 } 908 } 909 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 910 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 911 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 912 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 913 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 914 /* it may happen that endpoints are not defined at this point 915 if it is the case, mark this edge for a second pass */ 916 if (cum != size -1 || found != 2) { 917 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 918 if (print) { 919 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 920 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 921 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 922 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 923 } 924 eerr = PETSC_TRUE; 925 } 926 } 927 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 928 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 929 if (done) { 930 PetscInt *newprimals; 931 932 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 933 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 934 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 935 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 936 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 937 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 938 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %D)\n",eerr); 939 for (i=0;i<nee;i++) { 940 PetscBool has_candidates = PETSC_FALSE; 941 if (PetscBTLookup(bter,i)) { 942 PetscInt size,mark = i+1; 943 944 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 945 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 946 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 947 for (j=0;j<size;j++) { 948 PetscInt k,ee = idxs[j]; 949 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %D [%D %D)\n",ee,ii[ee],ii[ee+1]); 950 for (k=ii[ee];k<ii[ee+1];k++) { 951 /* set all candidates located on the edge as corners */ 952 if (PetscBTLookup(btvcand,jj[k])) { 953 PetscInt k2,vv = jj[k]; 954 has_candidates = PETSC_TRUE; 955 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %D\n",vv); 956 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 957 /* set all edge dofs connected to candidate as primals */ 958 for (k2=iit[vv];k2<iit[vv+1];k2++) { 959 if (marks[jjt[k2]] == mark) { 960 PetscInt k3,ee2 = jjt[k2]; 961 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %D\n",ee2); 962 newprimals[cum++] = ee2; 963 /* finally set the new corners */ 964 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 965 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %D\n",jj[k3]); 966 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 967 } 968 } 969 } 970 } else { 971 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %D\n",jj[k]); 972 } 973 } 974 } 975 if (!has_candidates) { /* circular edge */ 976 PetscInt k, ee = idxs[0],*tmarks; 977 978 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 979 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %D\n",i); 980 for (k=ii[ee];k<ii[ee+1];k++) { 981 PetscInt k2; 982 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %D\n",jj[k]); 983 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 984 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 985 } 986 for (j=0;j<size;j++) { 987 if (tmarks[idxs[j]] > 1) { 988 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %D\n",idxs[j]); 989 newprimals[cum++] = idxs[j]; 990 } 991 } 992 ierr = PetscFree(tmarks);CHKERRQ(ierr); 993 } 994 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 995 } 996 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 997 } 998 ierr = PetscFree(extcols);CHKERRQ(ierr); 999 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1000 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1001 if (fl2g) { 1002 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1003 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1004 for (i=0;i<nee;i++) { 1005 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1006 } 1007 ierr = PetscFree(eedges);CHKERRQ(ierr); 1008 } 1009 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1010 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1011 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1012 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1013 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1014 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1015 pcbddc->mat_graph->twodim = PETSC_FALSE; 1016 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1017 if (fl2g) { 1018 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1019 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1020 for (i=0;i<nee;i++) { 1021 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1022 } 1023 } else { 1024 eedges = alleedges; 1025 primals = allprimals; 1026 } 1027 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1028 1029 /* Mark again */ 1030 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1031 for (i=0;i<nee;i++) { 1032 PetscInt size,mark = i+1; 1033 1034 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1035 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1036 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1037 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1038 } 1039 if (print) { 1040 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1041 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1042 } 1043 1044 /* Recompute extended cols */ 1045 eerr = PETSC_FALSE; 1046 for (i=0;i<nee;i++) { 1047 PetscInt size; 1048 1049 cum = 0; 1050 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1051 if (!size && nedfieldlocal) continue; 1052 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1053 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1054 for (j=0;j<size;j++) { 1055 PetscInt k,ee = idxs[j]; 1056 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1057 } 1058 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1059 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1060 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1061 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1062 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1063 if (cum != size -1) { 1064 if (print) { 1065 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1066 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1067 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1068 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1069 } 1070 eerr = PETSC_TRUE; 1071 } 1072 } 1073 } 1074 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1075 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1076 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1077 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1078 /* an error should not occur at this point */ 1079 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1080 1081 /* Check the number of endpoints */ 1082 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1083 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1084 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1085 for (i=0;i<nee;i++) { 1086 PetscInt size, found = 0, gc[2]; 1087 1088 /* init with defaults */ 1089 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1090 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1091 if (!size && nedfieldlocal) continue; 1092 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %D",i); 1093 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1094 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1095 for (j=0;j<size;j++) { 1096 PetscInt k,ee = idxs[j]; 1097 for (k=ii[ee];k<ii[ee+1];k++) { 1098 PetscInt vv = jj[k]; 1099 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1100 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %D",i); 1101 corners[i*2+found++] = vv; 1102 } 1103 } 1104 } 1105 if (found != 2) { 1106 PetscInt e; 1107 if (fl2g) { 1108 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1109 } else { 1110 e = idxs[0]; 1111 } 1112 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %D corners for edge %D (astart %D, estart %D)",found,i,e,idxs[0]); 1113 } 1114 1115 /* get primal dof index on this coarse edge */ 1116 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1117 if (gc[0] > gc[1]) { 1118 PetscInt swap = corners[2*i]; 1119 corners[2*i] = corners[2*i+1]; 1120 corners[2*i+1] = swap; 1121 } 1122 cedges[i] = idxs[size-1]; 1123 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1124 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %D: ce %D, corners (%D,%D)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1125 } 1126 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1127 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1128 1129 #if defined(PETSC_USE_DEBUG) 1130 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1131 not interfere with neighbouring coarse edges */ 1132 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1133 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1134 for (i=0;i<nv;i++) { 1135 PetscInt emax = 0,eemax = 0; 1136 1137 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1138 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1139 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1140 for (j=1;j<nee+1;j++) { 1141 if (emax < emarks[j]) { 1142 emax = emarks[j]; 1143 eemax = j; 1144 } 1145 } 1146 /* not relevant for edges */ 1147 if (!eemax) continue; 1148 1149 for (j=ii[i];j<ii[i+1];j++) { 1150 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1151 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %D and %D) connected through the %D nodal dof at edge dof %D",marks[jj[j]]-1,eemax,i,jj[j]); 1152 } 1153 } 1154 } 1155 ierr = PetscFree(emarks);CHKERRQ(ierr); 1156 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1157 #endif 1158 1159 /* Compute extended rows indices for edge blocks of the change of basis */ 1160 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1161 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1162 extmem *= maxsize; 1163 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1164 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1165 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1166 for (i=0;i<nv;i++) { 1167 PetscInt mark = 0,size,start; 1168 1169 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1170 for (j=ii[i];j<ii[i+1];j++) 1171 if (marks[jj[j]] && !mark) 1172 mark = marks[jj[j]]; 1173 1174 /* not relevant */ 1175 if (!mark) continue; 1176 1177 /* import extended row */ 1178 mark--; 1179 start = mark*extmem+extrowcum[mark]; 1180 size = ii[i+1]-ii[i]; 1181 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %D > %D",extrowcum[mark] + size,extmem); 1182 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1183 extrowcum[mark] += size; 1184 } 1185 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1186 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1187 ierr = PetscFree(marks);CHKERRQ(ierr); 1188 1189 /* Compress extrows */ 1190 cum = 0; 1191 for (i=0;i<nee;i++) { 1192 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1193 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1194 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1195 cum = PetscMax(cum,size); 1196 } 1197 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1198 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1199 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1200 1201 /* Workspace for lapack inner calls and VecSetValues */ 1202 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1203 1204 /* Create change of basis matrix (preallocation can be improved) */ 1205 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1206 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1207 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1208 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1209 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1210 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1211 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1212 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1213 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1214 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1215 1216 /* Defaults to identity */ 1217 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1218 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1219 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1220 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1221 1222 /* Create discrete gradient for the coarser level if needed */ 1223 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1224 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1225 if (pcbddc->current_level < pcbddc->max_levels) { 1226 ISLocalToGlobalMapping cel2g,cvl2g; 1227 IS wis,gwis; 1228 PetscInt cnv,cne; 1229 1230 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1231 if (fl2g) { 1232 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1233 } else { 1234 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1235 pcbddc->nedclocal = wis; 1236 } 1237 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1238 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1239 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1240 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1241 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1242 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1243 1244 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1245 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1246 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1247 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1248 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1249 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1250 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1251 1252 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1253 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1254 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1255 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1256 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1257 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1258 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1259 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1260 } 1261 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1262 1263 #if defined(PRINT_GDET) 1264 inc = 0; 1265 lev = pcbddc->current_level; 1266 #endif 1267 1268 /* Insert values in the change of basis matrix */ 1269 for (i=0;i<nee;i++) { 1270 Mat Gins = NULL, GKins = NULL; 1271 IS cornersis = NULL; 1272 PetscScalar cvals[2]; 1273 1274 if (pcbddc->nedcG) { 1275 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1276 } 1277 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1278 if (Gins && GKins) { 1279 PetscScalar *data; 1280 const PetscInt *rows,*cols; 1281 PetscInt nrh,nch,nrc,ncc; 1282 1283 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1284 /* H1 */ 1285 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1286 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1287 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1288 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1289 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1290 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1291 /* complement */ 1292 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1293 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %D",i); 1294 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %D and Gins %D does not match %D for coarse edge %D",ncc,nch,nrc,i); 1295 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %D with ncc %D",i,ncc); 1296 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1297 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1298 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1299 1300 /* coarse discrete gradient */ 1301 if (pcbddc->nedcG) { 1302 PetscInt cols[2]; 1303 1304 cols[0] = 2*i; 1305 cols[1] = 2*i+1; 1306 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1307 } 1308 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1309 } 1310 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1311 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1312 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1313 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1314 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1315 } 1316 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1317 1318 /* Start assembling */ 1319 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1320 if (pcbddc->nedcG) { 1321 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1322 } 1323 1324 /* Free */ 1325 if (fl2g) { 1326 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1327 for (i=0;i<nee;i++) { 1328 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1329 } 1330 ierr = PetscFree(eedges);CHKERRQ(ierr); 1331 } 1332 1333 /* hack mat_graph with primal dofs on the coarse edges */ 1334 { 1335 PCBDDCGraph graph = pcbddc->mat_graph; 1336 PetscInt *oqueue = graph->queue; 1337 PetscInt *ocptr = graph->cptr; 1338 PetscInt ncc,*idxs; 1339 1340 /* find first primal edge */ 1341 if (pcbddc->nedclocal) { 1342 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1343 } else { 1344 if (fl2g) { 1345 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1346 } 1347 idxs = cedges; 1348 } 1349 cum = 0; 1350 while (cum < nee && cedges[cum] < 0) cum++; 1351 1352 /* adapt connected components */ 1353 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1354 graph->cptr[0] = 0; 1355 for (i=0,ncc=0;i<graph->ncc;i++) { 1356 PetscInt lc = ocptr[i+1]-ocptr[i]; 1357 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1358 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1359 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1360 ncc++; 1361 lc--; 1362 cum++; 1363 while (cum < nee && cedges[cum] < 0) cum++; 1364 } 1365 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1366 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1367 ncc++; 1368 } 1369 graph->ncc = ncc; 1370 if (pcbddc->nedclocal) { 1371 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1372 } 1373 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1374 } 1375 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1376 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1377 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1378 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1379 1380 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1381 ierr = PetscFree(extrow);CHKERRQ(ierr); 1382 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1383 ierr = PetscFree(corners);CHKERRQ(ierr); 1384 ierr = PetscFree(cedges);CHKERRQ(ierr); 1385 ierr = PetscFree(extrows);CHKERRQ(ierr); 1386 ierr = PetscFree(extcols);CHKERRQ(ierr); 1387 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1388 1389 /* Complete assembling */ 1390 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1391 if (pcbddc->nedcG) { 1392 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1393 #if 0 1394 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1395 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1396 #endif 1397 } 1398 1399 /* set change of basis */ 1400 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1401 ierr = MatDestroy(&T);CHKERRQ(ierr); 1402 1403 PetscFunctionReturn(0); 1404 } 1405 1406 /* the near-null space of BDDC carries information on quadrature weights, 1407 and these can be collinear -> so cheat with MatNullSpaceCreate 1408 and create a suitable set of basis vectors first */ 1409 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1410 { 1411 PetscErrorCode ierr; 1412 PetscInt i; 1413 1414 PetscFunctionBegin; 1415 for (i=0;i<nvecs;i++) { 1416 PetscInt first,last; 1417 1418 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1419 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1420 if (i>=first && i < last) { 1421 PetscScalar *data; 1422 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1423 if (!has_const) { 1424 data[i-first] = 1.; 1425 } else { 1426 data[2*i-first] = 1./PetscSqrtReal(2.); 1427 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1428 } 1429 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1430 } 1431 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1432 } 1433 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1434 for (i=0;i<nvecs;i++) { /* reset vectors */ 1435 PetscInt first,last; 1436 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1437 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1438 if (i>=first && i < last) { 1439 PetscScalar *data; 1440 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1441 if (!has_const) { 1442 data[i-first] = 0.; 1443 } else { 1444 data[2*i-first] = 0.; 1445 data[2*i-first+1] = 0.; 1446 } 1447 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1448 } 1449 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1450 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1451 } 1452 PetscFunctionReturn(0); 1453 } 1454 1455 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1456 { 1457 Mat loc_divudotp; 1458 Vec p,v,vins,quad_vec,*quad_vecs; 1459 ISLocalToGlobalMapping map; 1460 PetscScalar *vals; 1461 const PetscScalar *array; 1462 PetscInt i,maxneighs,maxsize; 1463 PetscInt n_neigh,*neigh,*n_shared,**shared; 1464 PetscMPIInt rank; 1465 PetscErrorCode ierr; 1466 1467 PetscFunctionBegin; 1468 ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1469 ierr = MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1470 if (!maxneighs) { 1471 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1472 *nnsp = NULL; 1473 PetscFunctionReturn(0); 1474 } 1475 maxsize = 0; 1476 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1477 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1478 /* create vectors to hold quadrature weights */ 1479 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1480 if (!transpose) { 1481 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1482 } else { 1483 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1484 } 1485 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1486 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1487 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1488 for (i=0;i<maxneighs;i++) { 1489 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1490 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1491 } 1492 1493 /* compute local quad vec */ 1494 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1495 if (!transpose) { 1496 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1497 } else { 1498 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1499 } 1500 ierr = VecSet(p,1.);CHKERRQ(ierr); 1501 if (!transpose) { 1502 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1503 } else { 1504 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1505 } 1506 if (vl2l) { 1507 Mat lA; 1508 VecScatter sc; 1509 1510 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1511 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1512 ierr = VecScatterCreateWithData(v,NULL,vins,vl2l,&sc);CHKERRQ(ierr); 1513 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1514 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1515 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1516 } else { 1517 vins = v; 1518 } 1519 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1520 ierr = VecDestroy(&p);CHKERRQ(ierr); 1521 1522 /* insert in global quadrature vecs */ 1523 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1524 for (i=0;i<n_neigh;i++) { 1525 const PetscInt *idxs; 1526 PetscInt idx,nn,j; 1527 1528 idxs = shared[i]; 1529 nn = n_shared[i]; 1530 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1531 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1532 idx = -(idx+1); 1533 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1534 } 1535 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1536 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1537 if (vl2l) { 1538 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1539 } 1540 ierr = VecDestroy(&v);CHKERRQ(ierr); 1541 ierr = PetscFree(vals);CHKERRQ(ierr); 1542 1543 /* assemble near null space */ 1544 for (i=0;i<maxneighs;i++) { 1545 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1546 } 1547 for (i=0;i<maxneighs;i++) { 1548 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1549 ierr = VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");CHKERRQ(ierr); 1550 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1551 } 1552 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1553 PetscFunctionReturn(0); 1554 } 1555 1556 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1557 { 1558 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1559 PetscErrorCode ierr; 1560 1561 PetscFunctionBegin; 1562 if (primalv) { 1563 if (pcbddc->user_primal_vertices_local) { 1564 IS list[2], newp; 1565 1566 list[0] = primalv; 1567 list[1] = pcbddc->user_primal_vertices_local; 1568 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1569 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1570 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1571 pcbddc->user_primal_vertices_local = newp; 1572 } else { 1573 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1574 } 1575 } 1576 PetscFunctionReturn(0); 1577 } 1578 1579 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1580 { 1581 PetscInt f, *comp = (PetscInt *)ctx; 1582 1583 PetscFunctionBegin; 1584 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1585 PetscFunctionReturn(0); 1586 } 1587 1588 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1589 { 1590 PetscErrorCode ierr; 1591 Vec local,global; 1592 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1593 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1594 PetscBool monolithic = PETSC_FALSE; 1595 1596 PetscFunctionBegin; 1597 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1598 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1599 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1600 /* need to convert from global to local topology information and remove references to information in global ordering */ 1601 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1602 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1603 if (monolithic) { /* just get block size to properly compute vertices */ 1604 if (pcbddc->vertex_size == 1) { 1605 ierr = MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);CHKERRQ(ierr); 1606 } 1607 goto boundary; 1608 } 1609 1610 if (pcbddc->user_provided_isfordofs) { 1611 if (pcbddc->n_ISForDofs) { 1612 PetscInt i; 1613 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1614 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1615 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1616 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1617 } 1618 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1619 pcbddc->n_ISForDofs = 0; 1620 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1621 } 1622 } else { 1623 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1624 DM dm; 1625 1626 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1627 if (!dm) { 1628 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1629 } 1630 if (dm) { 1631 IS *fields; 1632 PetscInt nf,i; 1633 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1634 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1635 for (i=0;i<nf;i++) { 1636 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1637 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1638 } 1639 ierr = PetscFree(fields);CHKERRQ(ierr); 1640 pcbddc->n_ISForDofsLocal = nf; 1641 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1642 PetscContainer c; 1643 1644 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1645 if (c) { 1646 MatISLocalFields lf; 1647 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1648 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1649 } else { /* fallback, create the default fields if bs > 1 */ 1650 PetscInt i, n = matis->A->rmap->n; 1651 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1652 if (i > 1) { 1653 pcbddc->n_ISForDofsLocal = i; 1654 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1655 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1656 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1657 } 1658 } 1659 } 1660 } 1661 } else { 1662 PetscInt i; 1663 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1664 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1665 } 1666 } 1667 } 1668 1669 boundary: 1670 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1671 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1672 } else if (pcbddc->DirichletBoundariesLocal) { 1673 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1674 } 1675 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1676 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1677 } else if (pcbddc->NeumannBoundariesLocal) { 1678 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1679 } 1680 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1681 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1682 } 1683 ierr = VecDestroy(&global);CHKERRQ(ierr); 1684 ierr = VecDestroy(&local);CHKERRQ(ierr); 1685 /* detect local disconnected subdomains if requested (use matis->A) */ 1686 if (pcbddc->detect_disconnected) { 1687 IS primalv = NULL; 1688 PetscInt i; 1689 PetscBool filter = pcbddc->detect_disconnected_filter; 1690 1691 for (i=0;i<pcbddc->n_local_subs;i++) { 1692 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1693 } 1694 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1695 ierr = PCBDDCDetectDisconnectedComponents(pc,filter,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1696 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1697 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1698 } 1699 /* early stage corner detection */ 1700 { 1701 DM dm; 1702 1703 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1704 if (dm) { 1705 PetscBool isda; 1706 1707 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1708 if (isda) { 1709 ISLocalToGlobalMapping l2l; 1710 IS corners; 1711 Mat lA; 1712 1713 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1714 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1715 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1716 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1717 if (l2l && corners) { 1718 const PetscInt *idx; 1719 PetscInt dof,bs,*idxout,n; 1720 1721 ierr = DMDAGetInfo(dm,NULL,NULL,NULL,NULL,NULL,NULL,NULL,&dof,NULL,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 1722 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1723 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1724 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1725 if (bs == dof) { 1726 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1727 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1728 } else { /* the original DMDA local-to-local map have been modified */ 1729 PetscInt i,d; 1730 1731 ierr = PetscMalloc1(dof*n,&idxout);CHKERRQ(ierr); 1732 for (i=0;i<n;i++) for (d=0;d<dof;d++) idxout[dof*i+d] = dof*idx[i]+d; 1733 ierr = ISLocalToGlobalMappingApply(l2l,dof*n,idxout,idxout);CHKERRQ(ierr); 1734 1735 bs = 1; 1736 n *= dof; 1737 } 1738 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1739 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1740 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1741 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1742 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1743 pcbddc->corner_selected = PETSC_TRUE; 1744 } else if (corners) { /* not from DMDA */ 1745 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1746 } 1747 } 1748 } 1749 } 1750 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1751 DM dm; 1752 1753 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1754 if (!dm) { 1755 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1756 } 1757 if (dm) { 1758 Vec vcoords; 1759 PetscSection section; 1760 PetscReal *coords; 1761 PetscInt d,cdim,nl,nf,**ctxs; 1762 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1763 1764 ierr = DMGetCoordinateDim(dm,&cdim);CHKERRQ(ierr); 1765 ierr = DMGetSection(dm,§ion);CHKERRQ(ierr); 1766 ierr = PetscSectionGetNumFields(section,&nf);CHKERRQ(ierr); 1767 ierr = DMCreateGlobalVector(dm,&vcoords);CHKERRQ(ierr); 1768 ierr = VecGetLocalSize(vcoords,&nl);CHKERRQ(ierr); 1769 ierr = PetscMalloc1(nl*cdim,&coords);CHKERRQ(ierr); 1770 ierr = PetscMalloc2(nf,&funcs,nf,&ctxs);CHKERRQ(ierr); 1771 ierr = PetscMalloc1(nf,&ctxs[0]);CHKERRQ(ierr); 1772 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1773 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1774 for (d=0;d<cdim;d++) { 1775 PetscInt i; 1776 const PetscScalar *v; 1777 1778 for (i=0;i<nf;i++) ctxs[i][0] = d; 1779 ierr = DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);CHKERRQ(ierr); 1780 ierr = VecGetArrayRead(vcoords,&v);CHKERRQ(ierr); 1781 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1782 ierr = VecRestoreArrayRead(vcoords,&v);CHKERRQ(ierr); 1783 } 1784 ierr = VecDestroy(&vcoords);CHKERRQ(ierr); 1785 ierr = PCSetCoordinates(pc,cdim,nl,coords);CHKERRQ(ierr); 1786 ierr = PetscFree(coords);CHKERRQ(ierr); 1787 ierr = PetscFree(ctxs[0]);CHKERRQ(ierr); 1788 ierr = PetscFree2(funcs,ctxs);CHKERRQ(ierr); 1789 } 1790 } 1791 PetscFunctionReturn(0); 1792 } 1793 1794 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1795 { 1796 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1797 PetscErrorCode ierr; 1798 IS nis; 1799 const PetscInt *idxs; 1800 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1801 PetscBool *ld; 1802 1803 PetscFunctionBegin; 1804 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1805 if (mop == MPI_LAND) { 1806 /* init rootdata with true */ 1807 ld = (PetscBool*) matis->sf_rootdata; 1808 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1809 } else { 1810 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1811 } 1812 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1813 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1814 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1815 ld = (PetscBool*) matis->sf_leafdata; 1816 for (i=0;i<nd;i++) 1817 if (-1 < idxs[i] && idxs[i] < n) 1818 ld[idxs[i]] = PETSC_TRUE; 1819 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1820 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1821 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1822 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1823 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1824 if (mop == MPI_LAND) { 1825 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1826 } else { 1827 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1828 } 1829 for (i=0,nnd=0;i<n;i++) 1830 if (ld[i]) 1831 nidxs[nnd++] = i; 1832 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1833 ierr = ISDestroy(is);CHKERRQ(ierr); 1834 *is = nis; 1835 PetscFunctionReturn(0); 1836 } 1837 1838 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1839 { 1840 PC_IS *pcis = (PC_IS*)(pc->data); 1841 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1842 PetscErrorCode ierr; 1843 1844 PetscFunctionBegin; 1845 if (!pcbddc->benign_have_null) { 1846 PetscFunctionReturn(0); 1847 } 1848 if (pcbddc->ChangeOfBasisMatrix) { 1849 Vec swap; 1850 1851 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1852 swap = pcbddc->work_change; 1853 pcbddc->work_change = r; 1854 r = swap; 1855 } 1856 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1857 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1858 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1859 ierr = VecSet(z,0.);CHKERRQ(ierr); 1860 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1861 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1862 if (pcbddc->ChangeOfBasisMatrix) { 1863 pcbddc->work_change = r; 1864 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1865 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1866 } 1867 PetscFunctionReturn(0); 1868 } 1869 1870 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1871 { 1872 PCBDDCBenignMatMult_ctx ctx; 1873 PetscErrorCode ierr; 1874 PetscBool apply_right,apply_left,reset_x; 1875 1876 PetscFunctionBegin; 1877 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1878 if (transpose) { 1879 apply_right = ctx->apply_left; 1880 apply_left = ctx->apply_right; 1881 } else { 1882 apply_right = ctx->apply_right; 1883 apply_left = ctx->apply_left; 1884 } 1885 reset_x = PETSC_FALSE; 1886 if (apply_right) { 1887 const PetscScalar *ax; 1888 PetscInt nl,i; 1889 1890 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1891 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1892 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1893 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1894 for (i=0;i<ctx->benign_n;i++) { 1895 PetscScalar sum,val; 1896 const PetscInt *idxs; 1897 PetscInt nz,j; 1898 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1899 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1900 sum = 0.; 1901 if (ctx->apply_p0) { 1902 val = ctx->work[idxs[nz-1]]; 1903 for (j=0;j<nz-1;j++) { 1904 sum += ctx->work[idxs[j]]; 1905 ctx->work[idxs[j]] += val; 1906 } 1907 } else { 1908 for (j=0;j<nz-1;j++) { 1909 sum += ctx->work[idxs[j]]; 1910 } 1911 } 1912 ctx->work[idxs[nz-1]] -= sum; 1913 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1914 } 1915 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1916 reset_x = PETSC_TRUE; 1917 } 1918 if (transpose) { 1919 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1920 } else { 1921 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1922 } 1923 if (reset_x) { 1924 ierr = VecResetArray(x);CHKERRQ(ierr); 1925 } 1926 if (apply_left) { 1927 PetscScalar *ay; 1928 PetscInt i; 1929 1930 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1931 for (i=0;i<ctx->benign_n;i++) { 1932 PetscScalar sum,val; 1933 const PetscInt *idxs; 1934 PetscInt nz,j; 1935 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1936 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1937 val = -ay[idxs[nz-1]]; 1938 if (ctx->apply_p0) { 1939 sum = 0.; 1940 for (j=0;j<nz-1;j++) { 1941 sum += ay[idxs[j]]; 1942 ay[idxs[j]] += val; 1943 } 1944 ay[idxs[nz-1]] += sum; 1945 } else { 1946 for (j=0;j<nz-1;j++) { 1947 ay[idxs[j]] += val; 1948 } 1949 ay[idxs[nz-1]] = 0.; 1950 } 1951 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1952 } 1953 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1954 } 1955 PetscFunctionReturn(0); 1956 } 1957 1958 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1959 { 1960 PetscErrorCode ierr; 1961 1962 PetscFunctionBegin; 1963 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1964 PetscFunctionReturn(0); 1965 } 1966 1967 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1968 { 1969 PetscErrorCode ierr; 1970 1971 PetscFunctionBegin; 1972 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1973 PetscFunctionReturn(0); 1974 } 1975 1976 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1977 { 1978 PC_IS *pcis = (PC_IS*)pc->data; 1979 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1980 PCBDDCBenignMatMult_ctx ctx; 1981 PetscErrorCode ierr; 1982 1983 PetscFunctionBegin; 1984 if (!restore) { 1985 Mat A_IB,A_BI; 1986 PetscScalar *work; 1987 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1988 1989 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1990 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1991 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1992 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1993 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1994 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1995 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1996 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1997 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1998 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1999 ctx->apply_left = PETSC_TRUE; 2000 ctx->apply_right = PETSC_FALSE; 2001 ctx->apply_p0 = PETSC_FALSE; 2002 ctx->benign_n = pcbddc->benign_n; 2003 if (reuse) { 2004 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2005 ctx->free = PETSC_FALSE; 2006 } else { /* TODO: could be optimized for successive solves */ 2007 ISLocalToGlobalMapping N_to_D; 2008 PetscInt i; 2009 2010 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2011 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2012 for (i=0;i<pcbddc->benign_n;i++) { 2013 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2014 } 2015 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2016 ctx->free = PETSC_TRUE; 2017 } 2018 ctx->A = pcis->A_IB; 2019 ctx->work = work; 2020 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2021 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2022 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2023 pcis->A_IB = A_IB; 2024 2025 /* A_BI as A_IB^T */ 2026 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2027 pcbddc->benign_original_mat = pcis->A_BI; 2028 pcis->A_BI = A_BI; 2029 } else { 2030 if (!pcbddc->benign_original_mat) { 2031 PetscFunctionReturn(0); 2032 } 2033 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2034 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2035 pcis->A_IB = ctx->A; 2036 ctx->A = NULL; 2037 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2038 pcis->A_BI = pcbddc->benign_original_mat; 2039 pcbddc->benign_original_mat = NULL; 2040 if (ctx->free) { 2041 PetscInt i; 2042 for (i=0;i<ctx->benign_n;i++) { 2043 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2044 } 2045 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2046 } 2047 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2048 ierr = PetscFree(ctx);CHKERRQ(ierr); 2049 } 2050 PetscFunctionReturn(0); 2051 } 2052 2053 /* used just in bddc debug mode */ 2054 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2055 { 2056 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2057 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2058 Mat An; 2059 PetscErrorCode ierr; 2060 2061 PetscFunctionBegin; 2062 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2063 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2064 if (is1) { 2065 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2066 ierr = MatDestroy(&An);CHKERRQ(ierr); 2067 } else { 2068 *B = An; 2069 } 2070 PetscFunctionReturn(0); 2071 } 2072 2073 /* TODO: add reuse flag */ 2074 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2075 { 2076 Mat Bt; 2077 PetscScalar *a,*bdata; 2078 const PetscInt *ii,*ij; 2079 PetscInt m,n,i,nnz,*bii,*bij; 2080 PetscBool flg_row; 2081 PetscErrorCode ierr; 2082 2083 PetscFunctionBegin; 2084 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2085 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2086 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2087 nnz = n; 2088 for (i=0;i<ii[n];i++) { 2089 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2090 } 2091 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2092 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2093 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2094 nnz = 0; 2095 bii[0] = 0; 2096 for (i=0;i<n;i++) { 2097 PetscInt j; 2098 for (j=ii[i];j<ii[i+1];j++) { 2099 PetscScalar entry = a[j]; 2100 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2101 bij[nnz] = ij[j]; 2102 bdata[nnz] = entry; 2103 nnz++; 2104 } 2105 } 2106 bii[i+1] = nnz; 2107 } 2108 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2109 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2110 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2111 { 2112 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2113 b->free_a = PETSC_TRUE; 2114 b->free_ij = PETSC_TRUE; 2115 } 2116 if (*B == A) { 2117 ierr = MatDestroy(&A);CHKERRQ(ierr); 2118 } 2119 *B = Bt; 2120 PetscFunctionReturn(0); 2121 } 2122 2123 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscBool filter, PetscInt *ncc, IS* cc[], IS* primalv) 2124 { 2125 Mat B = NULL; 2126 DM dm; 2127 IS is_dummy,*cc_n; 2128 ISLocalToGlobalMapping l2gmap_dummy; 2129 PCBDDCGraph graph; 2130 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2131 PetscInt i,n; 2132 PetscInt *xadj,*adjncy; 2133 PetscBool isplex = PETSC_FALSE; 2134 PetscErrorCode ierr; 2135 2136 PetscFunctionBegin; 2137 if (ncc) *ncc = 0; 2138 if (cc) *cc = NULL; 2139 if (primalv) *primalv = NULL; 2140 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2141 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2142 if (!dm) { 2143 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2144 } 2145 if (dm) { 2146 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2147 } 2148 if (filter) isplex = PETSC_FALSE; 2149 2150 if (isplex) { /* this code has been modified from plexpartition.c */ 2151 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2152 PetscInt *adj = NULL; 2153 IS cellNumbering; 2154 const PetscInt *cellNum; 2155 PetscBool useCone, useClosure; 2156 PetscSection section; 2157 PetscSegBuffer adjBuffer; 2158 PetscSF sfPoint; 2159 PetscErrorCode ierr; 2160 2161 PetscFunctionBegin; 2162 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2163 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2164 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2165 /* Build adjacency graph via a section/segbuffer */ 2166 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2167 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2168 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2169 /* Always use FVM adjacency to create partitioner graph */ 2170 ierr = DMPlexGetAdjacencyUseCone(dm, &useCone);CHKERRQ(ierr); 2171 ierr = DMPlexGetAdjacencyUseClosure(dm, &useClosure);CHKERRQ(ierr); 2172 ierr = DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);CHKERRQ(ierr); 2173 ierr = DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);CHKERRQ(ierr); 2174 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2175 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2176 for (n = 0, p = pStart; p < pEnd; p++) { 2177 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2178 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2179 adjSize = PETSC_DETERMINE; 2180 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2181 for (a = 0; a < adjSize; ++a) { 2182 const PetscInt point = adj[a]; 2183 if (pStart <= point && point < pEnd) { 2184 PetscInt *PETSC_RESTRICT pBuf; 2185 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2186 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2187 *pBuf = point; 2188 } 2189 } 2190 n++; 2191 } 2192 ierr = DMPlexSetAdjacencyUseCone(dm, useCone);CHKERRQ(ierr); 2193 ierr = DMPlexSetAdjacencyUseClosure(dm, useClosure);CHKERRQ(ierr); 2194 /* Derive CSR graph from section/segbuffer */ 2195 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2196 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2197 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2198 for (idx = 0, p = pStart; p < pEnd; p++) { 2199 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2200 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2201 } 2202 xadj[n] = size; 2203 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2204 /* Clean up */ 2205 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2206 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2207 ierr = PetscFree(adj);CHKERRQ(ierr); 2208 graph->xadj = xadj; 2209 graph->adjncy = adjncy; 2210 } else { 2211 Mat A; 2212 PetscBool isseqaij, flg_row; 2213 2214 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2215 if (!A->rmap->N || !A->cmap->N) { 2216 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2217 PetscFunctionReturn(0); 2218 } 2219 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2220 if (!isseqaij && filter) { 2221 PetscBool isseqdense; 2222 2223 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2224 if (!isseqdense) { 2225 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2226 } else { /* TODO: rectangular case and LDA */ 2227 PetscScalar *array; 2228 PetscReal chop=1.e-6; 2229 2230 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2231 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2232 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2233 for (i=0;i<n;i++) { 2234 PetscInt j; 2235 for (j=i+1;j<n;j++) { 2236 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2237 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2238 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2239 } 2240 } 2241 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2242 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2243 } 2244 } else { 2245 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2246 B = A; 2247 } 2248 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2249 2250 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2251 if (filter) { 2252 PetscScalar *data; 2253 PetscInt j,cum; 2254 2255 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2256 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2257 cum = 0; 2258 for (i=0;i<n;i++) { 2259 PetscInt t; 2260 2261 for (j=xadj[i];j<xadj[i+1];j++) { 2262 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2263 continue; 2264 } 2265 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2266 } 2267 t = xadj_filtered[i]; 2268 xadj_filtered[i] = cum; 2269 cum += t; 2270 } 2271 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2272 graph->xadj = xadj_filtered; 2273 graph->adjncy = adjncy_filtered; 2274 } else { 2275 graph->xadj = xadj; 2276 graph->adjncy = adjncy; 2277 } 2278 } 2279 /* compute local connected components using PCBDDCGraph */ 2280 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2281 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2282 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2283 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2284 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2285 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2286 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2287 2288 /* partial clean up */ 2289 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2290 if (B) { 2291 PetscBool flg_row; 2292 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2293 ierr = MatDestroy(&B);CHKERRQ(ierr); 2294 } 2295 if (isplex) { 2296 ierr = PetscFree(xadj);CHKERRQ(ierr); 2297 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2298 } 2299 2300 /* get back data */ 2301 if (isplex) { 2302 if (ncc) *ncc = graph->ncc; 2303 if (cc || primalv) { 2304 Mat A; 2305 PetscBT btv,btvt; 2306 PetscSection subSection; 2307 PetscInt *ids,cum,cump,*cids,*pids; 2308 2309 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2310 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2311 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2312 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2313 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2314 2315 cids[0] = 0; 2316 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2317 PetscInt j; 2318 2319 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2320 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2321 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2322 2323 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2324 for (k = 0; k < 2*size; k += 2) { 2325 PetscInt s, p = closure[k], off, dof, cdof; 2326 2327 ierr = PetscSectionGetConstraintDof(subSection, p, &cdof);CHKERRQ(ierr); 2328 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2329 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2330 for (s = 0; s < dof-cdof; s++) { 2331 if (PetscBTLookupSet(btvt,off+s)) continue; 2332 if (!PetscBTLookup(btv,off+s)) { 2333 ids[cum++] = off+s; 2334 } else { /* cross-vertex */ 2335 pids[cump++] = off+s; 2336 } 2337 } 2338 } 2339 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2340 } 2341 cids[i+1] = cum; 2342 /* mark dofs as already assigned */ 2343 for (j = cids[i]; j < cids[i+1]; j++) { 2344 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2345 } 2346 } 2347 if (cc) { 2348 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2349 for (i = 0; i < graph->ncc; i++) { 2350 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2351 } 2352 *cc = cc_n; 2353 } 2354 if (primalv) { 2355 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2356 } 2357 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2358 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2359 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2360 } 2361 } else { 2362 if (ncc) *ncc = graph->ncc; 2363 if (cc) { 2364 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2365 for (i=0;i<graph->ncc;i++) { 2366 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2367 } 2368 *cc = cc_n; 2369 } 2370 } 2371 /* clean up graph */ 2372 graph->xadj = 0; 2373 graph->adjncy = 0; 2374 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2375 PetscFunctionReturn(0); 2376 } 2377 2378 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2379 { 2380 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2381 PC_IS* pcis = (PC_IS*)(pc->data); 2382 IS dirIS = NULL; 2383 PetscInt i; 2384 PetscErrorCode ierr; 2385 2386 PetscFunctionBegin; 2387 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2388 if (zerodiag) { 2389 Mat A; 2390 Vec vec3_N; 2391 PetscScalar *vals; 2392 const PetscInt *idxs; 2393 PetscInt nz,*count; 2394 2395 /* p0 */ 2396 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2397 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2398 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2399 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2400 for (i=0;i<nz;i++) vals[i] = 1.; 2401 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2402 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2403 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2404 /* v_I */ 2405 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2406 for (i=0;i<nz;i++) vals[i] = 0.; 2407 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2408 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2409 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2410 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2411 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2412 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2413 if (dirIS) { 2414 PetscInt n; 2415 2416 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2417 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2418 for (i=0;i<n;i++) vals[i] = 0.; 2419 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2420 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2421 } 2422 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2423 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2424 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2425 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2426 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2427 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2428 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2429 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2430 ierr = PetscFree(vals);CHKERRQ(ierr); 2431 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2432 2433 /* there should not be any pressure dofs lying on the interface */ 2434 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2435 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2436 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2437 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2438 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2439 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %D is an interface dof",idxs[i]); 2440 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2441 ierr = PetscFree(count);CHKERRQ(ierr); 2442 } 2443 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2444 2445 /* check PCBDDCBenignGetOrSetP0 */ 2446 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2447 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2448 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2449 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2450 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2451 for (i=0;i<pcbddc->benign_n;i++) { 2452 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2453 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %D instead of %g",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2454 } 2455 PetscFunctionReturn(0); 2456 } 2457 2458 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2459 { 2460 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2461 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2462 PetscInt nz,n; 2463 PetscInt *interior_dofs,n_interior_dofs,nneu; 2464 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2465 PetscErrorCode ierr; 2466 2467 PetscFunctionBegin; 2468 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2469 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2470 for (n=0;n<pcbddc->benign_n;n++) { 2471 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2472 } 2473 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2474 pcbddc->benign_n = 0; 2475 2476 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2477 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2478 Checks if all the pressure dofs in each subdomain have a zero diagonal 2479 If not, a change of basis on pressures is not needed 2480 since the local Schur complements are already SPD 2481 */ 2482 has_null_pressures = PETSC_TRUE; 2483 have_null = PETSC_TRUE; 2484 if (pcbddc->n_ISForDofsLocal) { 2485 IS iP = NULL; 2486 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2487 2488 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2489 ierr = PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2490 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2491 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2492 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2493 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2494 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2495 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2496 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2497 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2498 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2499 if (iP) { 2500 IS newpressures; 2501 2502 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2503 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2504 pressures = newpressures; 2505 } 2506 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2507 if (!sorted) { 2508 ierr = ISSort(pressures);CHKERRQ(ierr); 2509 } 2510 } else { 2511 pressures = NULL; 2512 } 2513 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2514 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2515 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2516 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2517 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2518 if (!sorted) { 2519 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2520 } 2521 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2522 zerodiag_save = zerodiag; 2523 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2524 if (!nz) { 2525 if (n) have_null = PETSC_FALSE; 2526 has_null_pressures = PETSC_FALSE; 2527 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2528 } 2529 recompute_zerodiag = PETSC_FALSE; 2530 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2531 zerodiag_subs = NULL; 2532 pcbddc->benign_n = 0; 2533 n_interior_dofs = 0; 2534 interior_dofs = NULL; 2535 nneu = 0; 2536 if (pcbddc->NeumannBoundariesLocal) { 2537 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2538 } 2539 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2540 if (checkb) { /* need to compute interior nodes */ 2541 PetscInt n,i,j; 2542 PetscInt n_neigh,*neigh,*n_shared,**shared; 2543 PetscInt *iwork; 2544 2545 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2546 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2547 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2548 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2549 for (i=1;i<n_neigh;i++) 2550 for (j=0;j<n_shared[i];j++) 2551 iwork[shared[i][j]] += 1; 2552 for (i=0;i<n;i++) 2553 if (!iwork[i]) 2554 interior_dofs[n_interior_dofs++] = i; 2555 ierr = PetscFree(iwork);CHKERRQ(ierr); 2556 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2557 } 2558 if (has_null_pressures) { 2559 IS *subs; 2560 PetscInt nsubs,i,j,nl; 2561 const PetscInt *idxs; 2562 PetscScalar *array; 2563 Vec *work; 2564 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2565 2566 subs = pcbddc->local_subs; 2567 nsubs = pcbddc->n_local_subs; 2568 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2569 if (checkb) { 2570 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2571 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2572 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2573 /* work[0] = 1_p */ 2574 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2575 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2576 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2577 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2578 /* work[0] = 1_v */ 2579 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2580 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2581 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2582 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2583 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2584 } 2585 if (nsubs > 1) { 2586 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2587 for (i=0;i<nsubs;i++) { 2588 ISLocalToGlobalMapping l2g; 2589 IS t_zerodiag_subs; 2590 PetscInt nl; 2591 2592 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2593 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2594 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2595 if (nl) { 2596 PetscBool valid = PETSC_TRUE; 2597 2598 if (checkb) { 2599 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2600 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2601 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2602 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2603 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2604 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2605 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2606 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2607 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2608 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2609 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2610 for (j=0;j<n_interior_dofs;j++) { 2611 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2612 valid = PETSC_FALSE; 2613 break; 2614 } 2615 } 2616 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2617 } 2618 if (valid && nneu) { 2619 const PetscInt *idxs; 2620 PetscInt nzb; 2621 2622 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2623 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2624 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2625 if (nzb) valid = PETSC_FALSE; 2626 } 2627 if (valid && pressures) { 2628 IS t_pressure_subs; 2629 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2630 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2631 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2632 } 2633 if (valid) { 2634 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2635 pcbddc->benign_n++; 2636 } else { 2637 recompute_zerodiag = PETSC_TRUE; 2638 } 2639 } 2640 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2641 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2642 } 2643 } else { /* there's just one subdomain (or zero if they have not been detected */ 2644 PetscBool valid = PETSC_TRUE; 2645 2646 if (nneu) valid = PETSC_FALSE; 2647 if (valid && pressures) { 2648 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2649 } 2650 if (valid && checkb) { 2651 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2652 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2653 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2654 for (j=0;j<n_interior_dofs;j++) { 2655 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2656 valid = PETSC_FALSE; 2657 break; 2658 } 2659 } 2660 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2661 } 2662 if (valid) { 2663 pcbddc->benign_n = 1; 2664 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2665 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2666 zerodiag_subs[0] = zerodiag; 2667 } 2668 } 2669 if (checkb) { 2670 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2671 } 2672 } 2673 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2674 2675 if (!pcbddc->benign_n) { 2676 PetscInt n; 2677 2678 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2679 recompute_zerodiag = PETSC_FALSE; 2680 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2681 if (n) { 2682 has_null_pressures = PETSC_FALSE; 2683 have_null = PETSC_FALSE; 2684 } 2685 } 2686 2687 /* final check for null pressures */ 2688 if (zerodiag && pressures) { 2689 PetscInt nz,np; 2690 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2691 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2692 if (nz != np) have_null = PETSC_FALSE; 2693 } 2694 2695 if (recompute_zerodiag) { 2696 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2697 if (pcbddc->benign_n == 1) { 2698 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2699 zerodiag = zerodiag_subs[0]; 2700 } else { 2701 PetscInt i,nzn,*new_idxs; 2702 2703 nzn = 0; 2704 for (i=0;i<pcbddc->benign_n;i++) { 2705 PetscInt ns; 2706 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2707 nzn += ns; 2708 } 2709 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2710 nzn = 0; 2711 for (i=0;i<pcbddc->benign_n;i++) { 2712 PetscInt ns,*idxs; 2713 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2714 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2715 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2716 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2717 nzn += ns; 2718 } 2719 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2720 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2721 } 2722 have_null = PETSC_FALSE; 2723 } 2724 2725 /* Prepare matrix to compute no-net-flux */ 2726 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2727 Mat A,loc_divudotp; 2728 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2729 IS row,col,isused = NULL; 2730 PetscInt M,N,n,st,n_isused; 2731 2732 if (pressures) { 2733 isused = pressures; 2734 } else { 2735 isused = zerodiag_save; 2736 } 2737 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2738 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2739 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2740 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2741 n_isused = 0; 2742 if (isused) { 2743 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2744 } 2745 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2746 st = st-n_isused; 2747 if (n) { 2748 const PetscInt *gidxs; 2749 2750 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2751 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2752 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2753 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2754 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2755 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2756 } else { 2757 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2758 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2759 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2760 } 2761 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2762 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2763 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2764 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2765 ierr = ISDestroy(&row);CHKERRQ(ierr); 2766 ierr = ISDestroy(&col);CHKERRQ(ierr); 2767 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2768 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2769 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2770 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2771 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2772 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2773 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2774 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2775 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2776 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2777 } 2778 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2779 2780 /* change of basis and p0 dofs */ 2781 if (has_null_pressures) { 2782 IS zerodiagc; 2783 const PetscInt *idxs,*idxsc; 2784 PetscInt i,s,*nnz; 2785 2786 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2787 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2788 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2789 /* local change of basis for pressures */ 2790 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2791 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2792 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2793 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2794 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2795 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2796 for (i=0;i<pcbddc->benign_n;i++) { 2797 PetscInt nzs,j; 2798 2799 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2800 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2801 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2802 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2803 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2804 } 2805 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2806 ierr = MatSetOption(pcbddc->benign_change,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2807 ierr = PetscFree(nnz);CHKERRQ(ierr); 2808 /* set identity on velocities */ 2809 for (i=0;i<n-nz;i++) { 2810 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2811 } 2812 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2813 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2814 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2815 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2816 /* set change on pressures */ 2817 for (s=0;s<pcbddc->benign_n;s++) { 2818 PetscScalar *array; 2819 PetscInt nzs; 2820 2821 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2822 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2823 for (i=0;i<nzs-1;i++) { 2824 PetscScalar vals[2]; 2825 PetscInt cols[2]; 2826 2827 cols[0] = idxs[i]; 2828 cols[1] = idxs[nzs-1]; 2829 vals[0] = 1.; 2830 vals[1] = 1.; 2831 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2832 } 2833 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2834 for (i=0;i<nzs-1;i++) array[i] = -1.; 2835 array[nzs-1] = 1.; 2836 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2837 /* store local idxs for p0 */ 2838 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2839 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2840 ierr = PetscFree(array);CHKERRQ(ierr); 2841 } 2842 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2843 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2844 /* project if needed */ 2845 if (pcbddc->benign_change_explicit) { 2846 Mat M; 2847 2848 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2849 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2850 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2851 ierr = MatDestroy(&M);CHKERRQ(ierr); 2852 } 2853 /* store global idxs for p0 */ 2854 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2855 } 2856 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2857 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2858 2859 /* determines if the coarse solver will be singular or not */ 2860 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2861 /* determines if the problem has subdomains with 0 pressure block */ 2862 have_null = (PetscBool)(!!pcbddc->benign_n); 2863 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2864 *zerodiaglocal = zerodiag; 2865 PetscFunctionReturn(0); 2866 } 2867 2868 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2869 { 2870 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2871 PetscScalar *array; 2872 PetscErrorCode ierr; 2873 2874 PetscFunctionBegin; 2875 if (!pcbddc->benign_sf) { 2876 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2877 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2878 } 2879 if (get) { 2880 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2881 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2882 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2883 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2884 } else { 2885 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2886 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2887 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2888 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2889 } 2890 PetscFunctionReturn(0); 2891 } 2892 2893 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2894 { 2895 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2896 PetscErrorCode ierr; 2897 2898 PetscFunctionBegin; 2899 /* TODO: add error checking 2900 - avoid nested pop (or push) calls. 2901 - cannot push before pop. 2902 - cannot call this if pcbddc->local_mat is NULL 2903 */ 2904 if (!pcbddc->benign_n) { 2905 PetscFunctionReturn(0); 2906 } 2907 if (pop) { 2908 if (pcbddc->benign_change_explicit) { 2909 IS is_p0; 2910 MatReuse reuse; 2911 2912 /* extract B_0 */ 2913 reuse = MAT_INITIAL_MATRIX; 2914 if (pcbddc->benign_B0) { 2915 reuse = MAT_REUSE_MATRIX; 2916 } 2917 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2918 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2919 /* remove rows and cols from local problem */ 2920 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2921 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2922 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2923 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2924 } else { 2925 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2926 PetscScalar *vals; 2927 PetscInt i,n,*idxs_ins; 2928 2929 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2930 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2931 if (!pcbddc->benign_B0) { 2932 PetscInt *nnz; 2933 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2934 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2935 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2936 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2937 for (i=0;i<pcbddc->benign_n;i++) { 2938 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2939 nnz[i] = n - nnz[i]; 2940 } 2941 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2942 ierr = MatSetOption(pcbddc->benign_B0,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 2943 ierr = PetscFree(nnz);CHKERRQ(ierr); 2944 } 2945 2946 for (i=0;i<pcbddc->benign_n;i++) { 2947 PetscScalar *array; 2948 PetscInt *idxs,j,nz,cum; 2949 2950 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2951 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2952 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2953 for (j=0;j<nz;j++) vals[j] = 1.; 2954 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2955 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2956 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2957 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2958 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2959 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2960 cum = 0; 2961 for (j=0;j<n;j++) { 2962 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2963 vals[cum] = array[j]; 2964 idxs_ins[cum] = j; 2965 cum++; 2966 } 2967 } 2968 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2969 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2970 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2971 } 2972 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2973 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2974 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2975 } 2976 } else { /* push */ 2977 if (pcbddc->benign_change_explicit) { 2978 PetscInt i; 2979 2980 for (i=0;i<pcbddc->benign_n;i++) { 2981 PetscScalar *B0_vals; 2982 PetscInt *B0_cols,B0_ncol; 2983 2984 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2985 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2986 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2987 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2988 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2989 } 2990 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2991 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2992 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!"); 2993 } 2994 PetscFunctionReturn(0); 2995 } 2996 2997 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2998 { 2999 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3000 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3001 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3002 PetscBLASInt *B_iwork,*B_ifail; 3003 PetscScalar *work,lwork; 3004 PetscScalar *St,*S,*eigv; 3005 PetscScalar *Sarray,*Starray; 3006 PetscReal *eigs,thresh,lthresh,uthresh; 3007 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3008 PetscBool allocated_S_St; 3009 #if defined(PETSC_USE_COMPLEX) 3010 PetscReal *rwork; 3011 #endif 3012 PetscErrorCode ierr; 3013 3014 PetscFunctionBegin; 3015 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3016 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3017 if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3018 ierr = PetscLogEventBegin(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3019 3020 if (pcbddc->dbg_flag) { 3021 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3022 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3023 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3024 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3025 } 3026 3027 if (pcbddc->dbg_flag) { 3028 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %D (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef);CHKERRQ(ierr); 3029 } 3030 3031 /* max size of subsets */ 3032 mss = 0; 3033 for (i=0;i<sub_schurs->n_subs;i++) { 3034 PetscInt subset_size; 3035 3036 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3037 mss = PetscMax(mss,subset_size); 3038 } 3039 3040 /* min/max and threshold */ 3041 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3042 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3043 nmax = PetscMax(nmin,nmax); 3044 allocated_S_St = PETSC_FALSE; 3045 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3046 allocated_S_St = PETSC_TRUE; 3047 } 3048 3049 /* allocate lapack workspace */ 3050 cum = cum2 = 0; 3051 maxneigs = 0; 3052 for (i=0;i<sub_schurs->n_subs;i++) { 3053 PetscInt n,subset_size; 3054 3055 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3056 n = PetscMin(subset_size,nmax); 3057 cum += subset_size; 3058 cum2 += subset_size*n; 3059 maxneigs = PetscMax(maxneigs,n); 3060 } 3061 if (mss) { 3062 if (sub_schurs->is_symmetric) { 3063 PetscBLASInt B_itype = 1; 3064 PetscBLASInt B_N = mss; 3065 PetscReal zero = 0.0; 3066 PetscReal eps = 0.0; /* dlamch? */ 3067 3068 B_lwork = -1; 3069 S = NULL; 3070 St = NULL; 3071 eigs = NULL; 3072 eigv = NULL; 3073 B_iwork = NULL; 3074 B_ifail = NULL; 3075 #if defined(PETSC_USE_COMPLEX) 3076 rwork = NULL; 3077 #endif 3078 thresh = 1.0; 3079 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3080 #if defined(PETSC_USE_COMPLEX) 3081 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3082 #else 3083 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3084 #endif 3085 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3086 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3087 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3088 } else { 3089 lwork = 0; 3090 } 3091 3092 nv = 0; 3093 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3094 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3095 } 3096 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3097 if (allocated_S_St) { 3098 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3099 } 3100 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3101 #if defined(PETSC_USE_COMPLEX) 3102 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3103 #endif 3104 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3105 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3106 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3107 nv+cum,&pcbddc->adaptive_constraints_idxs, 3108 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3109 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 3110 3111 maxneigs = 0; 3112 cum = cumarray = 0; 3113 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3114 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3115 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3116 const PetscInt *idxs; 3117 3118 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3119 for (cum=0;cum<nv;cum++) { 3120 pcbddc->adaptive_constraints_n[cum] = 1; 3121 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3122 pcbddc->adaptive_constraints_data[cum] = 1.0; 3123 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3124 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3125 } 3126 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3127 } 3128 3129 if (mss) { /* multilevel */ 3130 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3131 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3132 } 3133 3134 lthresh = pcbddc->adaptive_threshold[0]; 3135 uthresh = pcbddc->adaptive_threshold[1]; 3136 for (i=0;i<sub_schurs->n_subs;i++) { 3137 const PetscInt *idxs; 3138 PetscReal upper,lower; 3139 PetscInt j,subset_size,eigs_start = 0; 3140 PetscBLASInt B_N; 3141 PetscBool same_data = PETSC_FALSE; 3142 PetscBool scal = PETSC_FALSE; 3143 3144 if (pcbddc->use_deluxe_scaling) { 3145 upper = PETSC_MAX_REAL; 3146 lower = uthresh; 3147 } else { 3148 if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3149 upper = 1./uthresh; 3150 lower = 0.; 3151 } 3152 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3153 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3154 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3155 /* this is experimental: we assume the dofs have been properly grouped to have 3156 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3157 if (!sub_schurs->is_posdef) { 3158 Mat T; 3159 3160 for (j=0;j<subset_size;j++) { 3161 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3162 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);CHKERRQ(ierr); 3163 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3164 ierr = MatDestroy(&T);CHKERRQ(ierr); 3165 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);CHKERRQ(ierr); 3166 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3167 ierr = MatDestroy(&T);CHKERRQ(ierr); 3168 if (sub_schurs->change_primal_sub) { 3169 PetscInt nz,k; 3170 const PetscInt *idxs; 3171 3172 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);CHKERRQ(ierr); 3173 ierr = ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3174 for (k=0;k<nz;k++) { 3175 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3176 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3177 } 3178 ierr = ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3179 } 3180 scal = PETSC_TRUE; 3181 break; 3182 } 3183 } 3184 } 3185 3186 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3187 if (sub_schurs->is_symmetric) { 3188 PetscInt j,k; 3189 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 3190 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3191 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3192 } 3193 for (j=0;j<subset_size;j++) { 3194 for (k=j;k<subset_size;k++) { 3195 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3196 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3197 } 3198 } 3199 } else { 3200 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3201 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3202 } 3203 } else { 3204 S = Sarray + cumarray; 3205 St = Starray + cumarray; 3206 } 3207 /* see if we can save some work */ 3208 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3209 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 3210 } 3211 3212 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3213 B_neigs = 0; 3214 } else { 3215 if (sub_schurs->is_symmetric) { 3216 PetscBLASInt B_itype = 1; 3217 PetscBLASInt B_IL, B_IU; 3218 PetscReal eps = -1.0; /* dlamch? */ 3219 PetscInt nmin_s; 3220 PetscBool compute_range; 3221 3222 B_neigs = 0; 3223 compute_range = (PetscBool)!same_data; 3224 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3225 3226 if (pcbddc->dbg_flag) { 3227 PetscInt nc = 0; 3228 3229 if (sub_schurs->change_primal_sub) { 3230 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);CHKERRQ(ierr); 3231 } 3232 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %D/%D size %D count %D fid %D (range %d) (change %D).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);CHKERRQ(ierr); 3233 } 3234 3235 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3236 if (compute_range) { 3237 3238 /* ask for eigenvalues larger than thresh */ 3239 if (sub_schurs->is_posdef) { 3240 #if defined(PETSC_USE_COMPLEX) 3241 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3242 #else 3243 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3244 #endif 3245 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3246 } else { /* no theory so far, but it works nicely */ 3247 PetscInt recipe = 0,recipe_m = 1; 3248 PetscReal bb[2]; 3249 3250 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);CHKERRQ(ierr); 3251 switch (recipe) { 3252 case 0: 3253 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3254 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3255 #if defined(PETSC_USE_COMPLEX) 3256 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3257 #else 3258 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3259 #endif 3260 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3261 break; 3262 case 1: 3263 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3264 #if defined(PETSC_USE_COMPLEX) 3265 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3266 #else 3267 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3268 #endif 3269 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3270 if (!scal) { 3271 PetscBLASInt B_neigs2 = 0; 3272 3273 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3274 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3275 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3276 #if defined(PETSC_USE_COMPLEX) 3277 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3278 #else 3279 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3280 #endif 3281 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3282 B_neigs += B_neigs2; 3283 } 3284 break; 3285 case 2: 3286 if (scal) { 3287 bb[0] = PETSC_MIN_REAL; 3288 bb[1] = 0; 3289 #if defined(PETSC_USE_COMPLEX) 3290 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3291 #else 3292 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3293 #endif 3294 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3295 } else { 3296 PetscBLASInt B_neigs2 = 0; 3297 PetscBool import = PETSC_FALSE; 3298 3299 lthresh = PetscMax(lthresh,0.0); 3300 if (lthresh > 0.0) { 3301 bb[0] = PETSC_MIN_REAL; 3302 bb[1] = lthresh*lthresh; 3303 3304 import = PETSC_TRUE; 3305 #if defined(PETSC_USE_COMPLEX) 3306 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3307 #else 3308 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3309 #endif 3310 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3311 } 3312 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3313 bb[1] = PETSC_MAX_REAL; 3314 if (import) { 3315 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3316 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3317 } 3318 #if defined(PETSC_USE_COMPLEX) 3319 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3320 #else 3321 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3322 #endif 3323 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3324 B_neigs += B_neigs2; 3325 } 3326 break; 3327 case 3: 3328 if (scal) { 3329 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);CHKERRQ(ierr); 3330 } else { 3331 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);CHKERRQ(ierr); 3332 } 3333 if (!scal) { 3334 bb[0] = uthresh; 3335 bb[1] = PETSC_MAX_REAL; 3336 #if defined(PETSC_USE_COMPLEX) 3337 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3338 #else 3339 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3340 #endif 3341 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3342 } 3343 if (recipe_m > 0 && B_N - B_neigs > 0) { 3344 PetscBLASInt B_neigs2 = 0; 3345 3346 B_IL = 1; 3347 ierr = PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);CHKERRQ(ierr); 3348 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3349 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3350 #if defined(PETSC_USE_COMPLEX) 3351 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3352 #else 3353 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3354 #endif 3355 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3356 B_neigs += B_neigs2; 3357 } 3358 break; 3359 case 4: 3360 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3361 #if defined(PETSC_USE_COMPLEX) 3362 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3363 #else 3364 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3365 #endif 3366 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3367 { 3368 PetscBLASInt B_neigs2 = 0; 3369 3370 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL; 3371 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3372 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3373 #if defined(PETSC_USE_COMPLEX) 3374 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3375 #else 3376 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3377 #endif 3378 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3379 B_neigs += B_neigs2; 3380 } 3381 break; 3382 case 5: /* same as before: first compute all eigenvalues, then filter */ 3383 #if defined(PETSC_USE_COMPLEX) 3384 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3385 #else 3386 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","A","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3387 #endif 3388 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3389 { 3390 PetscInt e,k,ne; 3391 for (e=0,ne=0;e<B_neigs;e++) { 3392 if (eigs[e] < lthresh || eigs[e] > uthresh) { 3393 for (k=0;k<B_N;k++) S[ne*B_N+k] = eigv[e*B_N+k]; 3394 eigs[ne] = eigs[e]; 3395 ne++; 3396 } 3397 } 3398 ierr = PetscMemcpy(eigv,S,B_N*ne*sizeof(PetscScalar));CHKERRQ(ierr); 3399 B_neigs = ne; 3400 } 3401 break; 3402 default: 3403 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe); 3404 break; 3405 } 3406 } 3407 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3408 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3409 B_IL = 1; 3410 #if defined(PETSC_USE_COMPLEX) 3411 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3412 #else 3413 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3414 #endif 3415 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3416 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3417 PetscInt k; 3418 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3419 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3420 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3421 nmin = nmax; 3422 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 3423 for (k=0;k<nmax;k++) { 3424 eigs[k] = 1./PETSC_SMALL; 3425 eigv[k*(subset_size+1)] = 1.0; 3426 } 3427 } 3428 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3429 if (B_ierr) { 3430 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3431 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3432 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3433 } 3434 3435 if (B_neigs > nmax) { 3436 if (pcbddc->dbg_flag) { 3437 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %D.\n",B_neigs,nmax);CHKERRQ(ierr); 3438 } 3439 if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax; 3440 B_neigs = nmax; 3441 } 3442 3443 nmin_s = PetscMin(nmin,B_N); 3444 if (B_neigs < nmin_s) { 3445 PetscBLASInt B_neigs2 = 0; 3446 3447 if (pcbddc->use_deluxe_scaling) { 3448 if (scal) { 3449 B_IU = nmin_s; 3450 B_IL = B_neigs + 1; 3451 } else { 3452 B_IL = B_N - nmin_s + 1; 3453 B_IU = B_N - B_neigs; 3454 } 3455 } else { 3456 B_IL = B_neigs + 1; 3457 B_IU = nmin_s; 3458 } 3459 if (pcbddc->dbg_flag) { 3460 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %D. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU);CHKERRQ(ierr); 3461 } 3462 if (sub_schurs->is_symmetric) { 3463 PetscInt j,k; 3464 for (j=0;j<subset_size;j++) { 3465 for (k=j;k<subset_size;k++) { 3466 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3467 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3468 } 3469 } 3470 } else { 3471 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3472 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3473 } 3474 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3475 #if defined(PETSC_USE_COMPLEX) 3476 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3477 #else 3478 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3479 #endif 3480 ierr = PetscLogFlops((4.0*subset_size*subset_size*subset_size)/3.0);CHKERRQ(ierr); 3481 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3482 B_neigs += B_neigs2; 3483 } 3484 if (B_ierr) { 3485 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3486 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3487 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3488 } 3489 if (pcbddc->dbg_flag) { 3490 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3491 for (j=0;j<B_neigs;j++) { 3492 if (eigs[j] == 0.0) { 3493 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3494 } else { 3495 if (pcbddc->use_deluxe_scaling) { 3496 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3497 } else { 3498 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3499 } 3500 } 3501 } 3502 } 3503 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3504 } 3505 /* change the basis back to the original one */ 3506 if (sub_schurs->change) { 3507 Mat change,phi,phit; 3508 3509 if (pcbddc->dbg_flag > 2) { 3510 PetscInt ii; 3511 for (ii=0;ii<B_neigs;ii++) { 3512 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3513 for (j=0;j<B_N;j++) { 3514 #if defined(PETSC_USE_COMPLEX) 3515 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3516 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3517 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3518 #else 3519 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3520 #endif 3521 } 3522 } 3523 } 3524 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3525 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3526 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3527 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3528 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3529 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3530 } 3531 maxneigs = PetscMax(B_neigs,maxneigs); 3532 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3533 if (B_neigs) { 3534 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3535 3536 if (pcbddc->dbg_flag > 1) { 3537 PetscInt ii; 3538 for (ii=0;ii<B_neigs;ii++) { 3539 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3540 for (j=0;j<B_N;j++) { 3541 #if defined(PETSC_USE_COMPLEX) 3542 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3543 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3544 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3545 #else 3546 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3547 #endif 3548 } 3549 } 3550 } 3551 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3552 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3553 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3554 cum++; 3555 } 3556 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3557 /* shift for next computation */ 3558 cumarray += subset_size*subset_size; 3559 } 3560 if (pcbddc->dbg_flag) { 3561 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3562 } 3563 3564 if (mss) { 3565 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3566 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3567 /* destroy matrices (junk) */ 3568 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3569 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3570 } 3571 if (allocated_S_St) { 3572 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3573 } 3574 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3575 #if defined(PETSC_USE_COMPLEX) 3576 ierr = PetscFree(rwork);CHKERRQ(ierr); 3577 #endif 3578 if (pcbddc->dbg_flag) { 3579 PetscInt maxneigs_r; 3580 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3581 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %D\n",maxneigs_r);CHKERRQ(ierr); 3582 } 3583 ierr = PetscLogEventEnd(PC_BDDC_AdaptiveSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3584 PetscFunctionReturn(0); 3585 } 3586 3587 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3588 { 3589 PetscScalar *coarse_submat_vals; 3590 PetscErrorCode ierr; 3591 3592 PetscFunctionBegin; 3593 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3594 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3595 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3596 3597 /* Setup local neumann solver ksp_R */ 3598 /* PCBDDCSetUpLocalScatters should be called first! */ 3599 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3600 3601 /* 3602 Setup local correction and local part of coarse basis. 3603 Gives back the dense local part of the coarse matrix in column major ordering 3604 */ 3605 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3606 3607 /* Compute total number of coarse nodes and setup coarse solver */ 3608 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3609 3610 /* free */ 3611 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3612 PetscFunctionReturn(0); 3613 } 3614 3615 PetscErrorCode PCBDDCResetCustomization(PC pc) 3616 { 3617 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3618 PetscErrorCode ierr; 3619 3620 PetscFunctionBegin; 3621 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3622 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3623 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3624 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3625 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3626 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3627 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3628 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3629 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3630 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3631 PetscFunctionReturn(0); 3632 } 3633 3634 PetscErrorCode PCBDDCResetTopography(PC pc) 3635 { 3636 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3637 PetscInt i; 3638 PetscErrorCode ierr; 3639 3640 PetscFunctionBegin; 3641 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3642 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3643 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3644 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3645 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3646 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3647 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3648 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3649 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3650 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3651 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3652 for (i=0;i<pcbddc->n_local_subs;i++) { 3653 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3654 } 3655 pcbddc->n_local_subs = 0; 3656 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3657 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3658 pcbddc->graphanalyzed = PETSC_FALSE; 3659 pcbddc->recompute_topography = PETSC_TRUE; 3660 pcbddc->corner_selected = PETSC_FALSE; 3661 PetscFunctionReturn(0); 3662 } 3663 3664 PetscErrorCode PCBDDCResetSolvers(PC pc) 3665 { 3666 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3667 PetscErrorCode ierr; 3668 3669 PetscFunctionBegin; 3670 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3671 if (pcbddc->coarse_phi_B) { 3672 PetscScalar *array; 3673 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3674 ierr = PetscFree(array);CHKERRQ(ierr); 3675 } 3676 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3677 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3678 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3679 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3680 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3681 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3682 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3683 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3684 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3685 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3686 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3687 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3688 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3689 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3690 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3691 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3692 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3693 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3694 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3695 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3696 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3697 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3698 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3699 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3700 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3701 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3702 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3703 if (pcbddc->benign_zerodiag_subs) { 3704 PetscInt i; 3705 for (i=0;i<pcbddc->benign_n;i++) { 3706 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3707 } 3708 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3709 } 3710 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3711 PetscFunctionReturn(0); 3712 } 3713 3714 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3715 { 3716 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3717 PC_IS *pcis = (PC_IS*)pc->data; 3718 VecType impVecType; 3719 PetscInt n_constraints,n_R,old_size; 3720 PetscErrorCode ierr; 3721 3722 PetscFunctionBegin; 3723 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3724 n_R = pcis->n - pcbddc->n_vertices; 3725 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3726 /* local work vectors (try to avoid unneeded work)*/ 3727 /* R nodes */ 3728 old_size = -1; 3729 if (pcbddc->vec1_R) { 3730 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3731 } 3732 if (n_R != old_size) { 3733 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3734 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3735 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3736 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3737 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3738 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3739 } 3740 /* local primal dofs */ 3741 old_size = -1; 3742 if (pcbddc->vec1_P) { 3743 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3744 } 3745 if (pcbddc->local_primal_size != old_size) { 3746 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3747 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3748 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3749 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3750 } 3751 /* local explicit constraints */ 3752 old_size = -1; 3753 if (pcbddc->vec1_C) { 3754 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3755 } 3756 if (n_constraints && n_constraints != old_size) { 3757 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3758 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3759 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3760 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3761 } 3762 PetscFunctionReturn(0); 3763 } 3764 3765 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3766 { 3767 PetscErrorCode ierr; 3768 /* pointers to pcis and pcbddc */ 3769 PC_IS* pcis = (PC_IS*)pc->data; 3770 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3771 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3772 /* submatrices of local problem */ 3773 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3774 /* submatrices of local coarse problem */ 3775 Mat S_VV,S_CV,S_VC,S_CC; 3776 /* working matrices */ 3777 Mat C_CR; 3778 /* additional working stuff */ 3779 PC pc_R; 3780 Mat F,Brhs = NULL; 3781 Vec dummy_vec; 3782 PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs; 3783 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3784 PetscScalar *work; 3785 PetscInt *idx_V_B; 3786 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3787 PetscInt i,n_R,n_D,n_B; 3788 3789 /* some shortcuts to scalars */ 3790 PetscScalar one=1.0,m_one=-1.0; 3791 3792 PetscFunctionBegin; 3793 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3794 ierr = PetscLogEventBegin(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 3795 3796 /* Set Non-overlapping dimensions */ 3797 n_vertices = pcbddc->n_vertices; 3798 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3799 n_B = pcis->n_B; 3800 n_D = pcis->n - n_B; 3801 n_R = pcis->n - n_vertices; 3802 3803 /* vertices in boundary numbering */ 3804 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3805 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3806 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",n_vertices,i); 3807 3808 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3809 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3810 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3811 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3812 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3813 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3814 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3815 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3816 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3817 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3818 3819 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3820 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3821 ierr = PCSetUp(pc_R);CHKERRQ(ierr); 3822 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3823 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3824 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3825 lda_rhs = n_R; 3826 need_benign_correction = PETSC_FALSE; 3827 if (isLU || isILU || isCHOL) { 3828 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3829 } else if (sub_schurs && sub_schurs->reuse_solver) { 3830 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3831 MatFactorType type; 3832 3833 F = reuse_solver->F; 3834 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3835 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3836 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3837 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3838 } else { 3839 F = NULL; 3840 } 3841 3842 /* determine if we can use a sparse right-hand side */ 3843 sparserhs = PETSC_FALSE; 3844 if (F) { 3845 MatSolverType solver; 3846 3847 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3848 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3849 } 3850 3851 /* allocate workspace */ 3852 n = 0; 3853 if (n_constraints) { 3854 n += lda_rhs*n_constraints; 3855 } 3856 if (n_vertices) { 3857 n = PetscMax(2*lda_rhs*n_vertices,n); 3858 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3859 } 3860 if (!pcbddc->symmetric_primal) { 3861 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3862 } 3863 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3864 3865 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3866 dummy_vec = NULL; 3867 if (need_benign_correction && lda_rhs != n_R && F) { 3868 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&dummy_vec);CHKERRQ(ierr); 3869 ierr = VecSetSizes(dummy_vec,lda_rhs,PETSC_DECIDE);CHKERRQ(ierr); 3870 ierr = VecSetType(dummy_vec,((PetscObject)pcis->vec1_N)->type_name);CHKERRQ(ierr); 3871 } 3872 3873 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3874 if (n_constraints) { 3875 Mat M3,C_B; 3876 IS is_aux; 3877 PetscScalar *array,*array2; 3878 3879 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3880 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3881 3882 /* Extract constraints on R nodes: C_{CR} */ 3883 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3884 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3885 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3886 3887 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3888 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3889 if (!sparserhs) { 3890 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3891 for (i=0;i<n_constraints;i++) { 3892 const PetscScalar *row_cmat_values; 3893 const PetscInt *row_cmat_indices; 3894 PetscInt size_of_constraint,j; 3895 3896 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3897 for (j=0;j<size_of_constraint;j++) { 3898 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3899 } 3900 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3901 } 3902 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 3903 } else { 3904 Mat tC_CR; 3905 3906 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3907 if (lda_rhs != n_R) { 3908 PetscScalar *aa; 3909 PetscInt r,*ii,*jj; 3910 PetscBool done; 3911 3912 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3913 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 3914 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 3915 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 3916 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3917 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 3918 } else { 3919 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 3920 tC_CR = C_CR; 3921 } 3922 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 3923 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 3924 } 3925 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3926 if (F) { 3927 if (need_benign_correction) { 3928 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3929 3930 /* rhs is already zero on interior dofs, no need to change the rhs */ 3931 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3932 } 3933 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 3934 if (need_benign_correction) { 3935 PetscScalar *marr; 3936 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3937 3938 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3939 if (lda_rhs != n_R) { 3940 for (i=0;i<n_constraints;i++) { 3941 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3942 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3943 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3944 } 3945 } else { 3946 for (i=0;i<n_constraints;i++) { 3947 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3948 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3949 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3950 } 3951 } 3952 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3953 } 3954 } else { 3955 PetscScalar *marr; 3956 3957 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3958 for (i=0;i<n_constraints;i++) { 3959 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3960 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3961 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3962 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3963 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3964 } 3965 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3966 } 3967 if (sparserhs) { 3968 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3969 } 3970 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3971 if (!pcbddc->switch_static) { 3972 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3973 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3974 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3975 for (i=0;i<n_constraints;i++) { 3976 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3977 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3978 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3979 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3980 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3981 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3982 } 3983 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3984 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3985 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3986 } else { 3987 if (lda_rhs != n_R) { 3988 IS dummy; 3989 3990 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3991 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3992 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3993 } else { 3994 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3995 pcbddc->local_auxmat2 = local_auxmat2_R; 3996 } 3997 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3998 } 3999 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4000 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 4001 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 4002 if (isCHOL) { 4003 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 4004 } else { 4005 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 4006 } 4007 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 4008 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4009 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 4010 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4011 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4012 ierr = MatDestroy(&M3);CHKERRQ(ierr); 4013 } 4014 4015 /* Get submatrices from subdomain matrix */ 4016 if (n_vertices) { 4017 IS is_aux; 4018 PetscBool isseqaij; 4019 4020 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4021 IS tis; 4022 4023 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 4024 ierr = ISSort(tis);CHKERRQ(ierr); 4025 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 4026 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4027 } else { 4028 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 4029 } 4030 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 4031 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 4032 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4033 if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */ 4034 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4035 } 4036 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 4037 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4038 } 4039 4040 /* Matrix of coarse basis functions (local) */ 4041 if (pcbddc->coarse_phi_B) { 4042 PetscInt on_B,on_primal,on_D=n_D; 4043 if (pcbddc->coarse_phi_D) { 4044 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 4045 } 4046 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 4047 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4048 PetscScalar *marray; 4049 4050 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 4051 ierr = PetscFree(marray);CHKERRQ(ierr); 4052 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4053 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4054 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4055 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4056 } 4057 } 4058 4059 if (!pcbddc->coarse_phi_B) { 4060 PetscScalar *marr; 4061 4062 /* memory size */ 4063 n = n_B*pcbddc->local_primal_size; 4064 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4065 if (!pcbddc->symmetric_primal) n *= 2; 4066 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 4067 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4068 marr += n_B*pcbddc->local_primal_size; 4069 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4070 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4071 marr += n_D*pcbddc->local_primal_size; 4072 } 4073 if (!pcbddc->symmetric_primal) { 4074 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4075 marr += n_B*pcbddc->local_primal_size; 4076 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4077 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4078 } 4079 } else { 4080 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 4081 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4082 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4083 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 4084 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4085 } 4086 } 4087 } 4088 4089 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4090 p0_lidx_I = NULL; 4091 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4092 const PetscInt *idxs; 4093 4094 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4095 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 4096 for (i=0;i<pcbddc->benign_n;i++) { 4097 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 4098 } 4099 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4100 } 4101 4102 /* vertices */ 4103 if (n_vertices) { 4104 PetscBool restoreavr = PETSC_FALSE; 4105 4106 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 4107 4108 if (n_R) { 4109 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4110 PetscBLASInt B_N,B_one = 1; 4111 PetscScalar *x,*y; 4112 4113 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 4114 if (need_benign_correction) { 4115 ISLocalToGlobalMapping RtoN; 4116 IS is_p0; 4117 PetscInt *idxs_p0,n; 4118 4119 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 4120 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 4121 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 4122 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %D != %D",n,pcbddc->benign_n); 4123 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 4124 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 4125 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 4126 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 4127 } 4128 4129 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4130 if (!sparserhs || need_benign_correction) { 4131 if (lda_rhs == n_R) { 4132 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4133 } else { 4134 PetscScalar *av,*array; 4135 const PetscInt *xadj,*adjncy; 4136 PetscInt n; 4137 PetscBool flg_row; 4138 4139 array = work+lda_rhs*n_vertices; 4140 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4141 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4142 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4143 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 4144 for (i=0;i<n;i++) { 4145 PetscInt j; 4146 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4147 } 4148 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4149 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4150 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 4151 } 4152 if (need_benign_correction) { 4153 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4154 PetscScalar *marr; 4155 4156 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 4157 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4158 4159 | 0 0 0 | (V) 4160 L = | 0 0 -1 | (P-p0) 4161 | 0 0 -1 | (p0) 4162 4163 */ 4164 for (i=0;i<reuse_solver->benign_n;i++) { 4165 const PetscScalar *vals; 4166 const PetscInt *idxs,*idxs_zero; 4167 PetscInt n,j,nz; 4168 4169 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4170 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4171 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4172 for (j=0;j<n;j++) { 4173 PetscScalar val = vals[j]; 4174 PetscInt k,col = idxs[j]; 4175 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4176 } 4177 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4178 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4179 } 4180 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 4181 } 4182 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 4183 Brhs = A_RV; 4184 } else { 4185 Mat tA_RVT,A_RVT; 4186 4187 if (!pcbddc->symmetric_primal) { 4188 /* A_RV already scaled by -1 */ 4189 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 4190 } else { 4191 restoreavr = PETSC_TRUE; 4192 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4193 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 4194 A_RVT = A_VR; 4195 } 4196 if (lda_rhs != n_R) { 4197 PetscScalar *aa; 4198 PetscInt r,*ii,*jj; 4199 PetscBool done; 4200 4201 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4202 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4203 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4204 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4205 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4206 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4207 } else { 4208 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4209 tA_RVT = A_RVT; 4210 } 4211 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4212 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4213 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4214 } 4215 if (F) { 4216 /* need to correct the rhs */ 4217 if (need_benign_correction) { 4218 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4219 PetscScalar *marr; 4220 4221 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4222 if (lda_rhs != n_R) { 4223 for (i=0;i<n_vertices;i++) { 4224 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4225 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4226 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4227 } 4228 } else { 4229 for (i=0;i<n_vertices;i++) { 4230 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4231 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4232 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4233 } 4234 } 4235 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4236 } 4237 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4238 if (restoreavr) { 4239 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4240 } 4241 /* need to correct the solution */ 4242 if (need_benign_correction) { 4243 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4244 PetscScalar *marr; 4245 4246 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4247 if (lda_rhs != n_R) { 4248 for (i=0;i<n_vertices;i++) { 4249 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4250 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4251 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4252 } 4253 } else { 4254 for (i=0;i<n_vertices;i++) { 4255 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4256 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4257 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4258 } 4259 } 4260 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4261 } 4262 } else { 4263 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4264 for (i=0;i<n_vertices;i++) { 4265 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4266 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4267 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4268 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4269 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4270 } 4271 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4272 } 4273 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4274 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4275 /* S_VV and S_CV */ 4276 if (n_constraints) { 4277 Mat B; 4278 4279 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4280 for (i=0;i<n_vertices;i++) { 4281 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4282 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4283 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4284 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4285 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4286 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4287 } 4288 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4289 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 4290 ierr = MatDestroy(&B);CHKERRQ(ierr); 4291 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4292 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4293 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4294 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4295 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4296 ierr = MatDestroy(&B);CHKERRQ(ierr); 4297 } 4298 if (lda_rhs != n_R) { 4299 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4300 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4301 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4302 } 4303 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4304 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4305 if (need_benign_correction) { 4306 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4307 PetscScalar *marr,*sums; 4308 4309 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4310 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4311 for (i=0;i<reuse_solver->benign_n;i++) { 4312 const PetscScalar *vals; 4313 const PetscInt *idxs,*idxs_zero; 4314 PetscInt n,j,nz; 4315 4316 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4317 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4318 for (j=0;j<n_vertices;j++) { 4319 PetscInt k; 4320 sums[j] = 0.; 4321 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4322 } 4323 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4324 for (j=0;j<n;j++) { 4325 PetscScalar val = vals[j]; 4326 PetscInt k; 4327 for (k=0;k<n_vertices;k++) { 4328 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4329 } 4330 } 4331 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4332 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4333 } 4334 ierr = PetscFree(sums);CHKERRQ(ierr); 4335 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4336 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4337 } 4338 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4339 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4340 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 4341 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4342 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4343 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 4344 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4345 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4346 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4347 } else { 4348 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4349 } 4350 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4351 4352 /* coarse basis functions */ 4353 for (i=0;i<n_vertices;i++) { 4354 PetscScalar *y; 4355 4356 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4357 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4358 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4359 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4360 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4361 y[n_B*i+idx_V_B[i]] = 1.0; 4362 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4363 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4364 4365 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4366 PetscInt j; 4367 4368 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4369 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4370 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4371 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4372 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4373 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4374 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4375 } 4376 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4377 } 4378 /* if n_R == 0 the object is not destroyed */ 4379 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4380 } 4381 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4382 4383 if (n_constraints) { 4384 Mat B; 4385 4386 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4387 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4388 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4389 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4390 if (n_vertices) { 4391 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4392 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4393 } else { 4394 Mat S_VCt; 4395 4396 if (lda_rhs != n_R) { 4397 ierr = MatDestroy(&B);CHKERRQ(ierr); 4398 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4399 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4400 } 4401 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4402 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4403 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4404 } 4405 } 4406 ierr = MatDestroy(&B);CHKERRQ(ierr); 4407 /* coarse basis functions */ 4408 for (i=0;i<n_constraints;i++) { 4409 PetscScalar *y; 4410 4411 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4412 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4413 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4414 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4415 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4416 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4417 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4418 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4419 PetscInt j; 4420 4421 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4422 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4423 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4424 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4425 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4426 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4427 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4428 } 4429 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4430 } 4431 } 4432 if (n_constraints) { 4433 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4434 } 4435 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4436 4437 /* coarse matrix entries relative to B_0 */ 4438 if (pcbddc->benign_n) { 4439 Mat B0_B,B0_BPHI; 4440 IS is_dummy; 4441 PetscScalar *data; 4442 PetscInt j; 4443 4444 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4445 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4446 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4447 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4448 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4449 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 4450 for (j=0;j<pcbddc->benign_n;j++) { 4451 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4452 for (i=0;i<pcbddc->local_primal_size;i++) { 4453 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4454 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4455 } 4456 } 4457 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 4458 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4459 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4460 } 4461 4462 /* compute other basis functions for non-symmetric problems */ 4463 if (!pcbddc->symmetric_primal) { 4464 Mat B_V=NULL,B_C=NULL; 4465 PetscScalar *marray; 4466 4467 if (n_constraints) { 4468 Mat S_CCT,C_CRT; 4469 4470 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4471 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4472 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4473 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4474 if (n_vertices) { 4475 Mat S_VCT; 4476 4477 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4478 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4479 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4480 } 4481 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4482 } else { 4483 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4484 } 4485 if (n_vertices && n_R) { 4486 PetscScalar *av,*marray; 4487 const PetscInt *xadj,*adjncy; 4488 PetscInt n; 4489 PetscBool flg_row; 4490 4491 /* B_V = B_V - A_VR^T */ 4492 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4493 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4494 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4495 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4496 for (i=0;i<n;i++) { 4497 PetscInt j; 4498 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4499 } 4500 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4501 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4502 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4503 } 4504 4505 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4506 if (n_vertices) { 4507 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4508 for (i=0;i<n_vertices;i++) { 4509 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4510 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4511 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4512 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4513 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4514 } 4515 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4516 } 4517 if (B_C) { 4518 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4519 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4520 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4521 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4522 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4523 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4524 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4525 } 4526 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4527 } 4528 /* coarse basis functions */ 4529 for (i=0;i<pcbddc->local_primal_size;i++) { 4530 PetscScalar *y; 4531 4532 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4533 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4534 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4535 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4536 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4537 if (i<n_vertices) { 4538 y[n_B*i+idx_V_B[i]] = 1.0; 4539 } 4540 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4541 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4542 4543 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4544 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4545 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4546 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4547 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4548 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4549 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4550 } 4551 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4552 } 4553 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4554 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4555 } 4556 4557 /* free memory */ 4558 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4559 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4560 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4561 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4562 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4563 ierr = PetscFree(work);CHKERRQ(ierr); 4564 if (n_vertices) { 4565 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4566 } 4567 if (n_constraints) { 4568 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4569 } 4570 /* Checking coarse_sub_mat and coarse basis functios */ 4571 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4572 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4573 if (pcbddc->dbg_flag) { 4574 Mat coarse_sub_mat; 4575 Mat AUXMAT,TM1,TM2,TM3,TM4; 4576 Mat coarse_phi_D,coarse_phi_B; 4577 Mat coarse_psi_D,coarse_psi_B; 4578 Mat A_II,A_BB,A_IB,A_BI; 4579 Mat C_B,CPHI; 4580 IS is_dummy; 4581 Vec mones; 4582 MatType checkmattype=MATSEQAIJ; 4583 PetscReal real_value; 4584 4585 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4586 Mat A; 4587 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4588 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4589 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4590 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4591 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4592 ierr = MatDestroy(&A);CHKERRQ(ierr); 4593 } else { 4594 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4595 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4596 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4597 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4598 } 4599 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4600 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4601 if (!pcbddc->symmetric_primal) { 4602 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4603 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4604 } 4605 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4606 4607 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4608 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4609 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4610 if (!pcbddc->symmetric_primal) { 4611 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4612 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4613 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4614 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4615 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4616 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4617 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4618 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4619 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4620 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4621 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4622 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4623 } else { 4624 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4625 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4626 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4627 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4628 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4629 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4630 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4631 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4632 } 4633 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4634 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4635 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4636 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4637 if (pcbddc->benign_n) { 4638 Mat B0_B,B0_BPHI; 4639 PetscScalar *data,*data2; 4640 PetscInt j; 4641 4642 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4643 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4644 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4645 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4646 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4647 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4648 for (j=0;j<pcbddc->benign_n;j++) { 4649 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4650 for (i=0;i<pcbddc->local_primal_size;i++) { 4651 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4652 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4653 } 4654 } 4655 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4656 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4657 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4658 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4659 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4660 } 4661 #if 0 4662 { 4663 PetscViewer viewer; 4664 char filename[256]; 4665 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4666 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4667 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4668 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4669 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4670 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4671 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4672 if (pcbddc->coarse_phi_B) { 4673 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4674 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4675 } 4676 if (pcbddc->coarse_phi_D) { 4677 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4678 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4679 } 4680 if (pcbddc->coarse_psi_B) { 4681 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4682 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4683 } 4684 if (pcbddc->coarse_psi_D) { 4685 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4686 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4687 } 4688 ierr = PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");CHKERRQ(ierr); 4689 ierr = MatView(pcbddc->local_mat,viewer);CHKERRQ(ierr); 4690 ierr = PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");CHKERRQ(ierr); 4691 ierr = MatView(pcbddc->ConstraintMatrix,viewer);CHKERRQ(ierr); 4692 ierr = PetscObjectSetName((PetscObject)pcis->is_I_local,"I");CHKERRQ(ierr); 4693 ierr = ISView(pcis->is_I_local,viewer);CHKERRQ(ierr); 4694 ierr = PetscObjectSetName((PetscObject)pcis->is_B_local,"B");CHKERRQ(ierr); 4695 ierr = ISView(pcis->is_B_local,viewer);CHKERRQ(ierr); 4696 ierr = PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");CHKERRQ(ierr); 4697 ierr = ISView(pcbddc->is_R_local,viewer);CHKERRQ(ierr); 4698 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4699 } 4700 #endif 4701 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4702 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4703 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4704 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4705 4706 /* check constraints */ 4707 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4708 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4709 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4710 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4711 } else { 4712 PetscScalar *data; 4713 Mat tmat; 4714 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4715 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4716 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4717 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4718 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4719 } 4720 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4721 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4722 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4723 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4724 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4725 if (!pcbddc->symmetric_primal) { 4726 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4727 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4728 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4729 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4730 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4731 } 4732 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4733 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4734 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4735 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4736 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4737 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4738 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4739 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4740 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4741 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4742 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4743 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4744 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4745 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4746 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4747 if (!pcbddc->symmetric_primal) { 4748 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4749 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4750 } 4751 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4752 } 4753 /* get back data */ 4754 *coarse_submat_vals_n = coarse_submat_vals; 4755 ierr = PetscLogEventEnd(PC_BDDC_CorrectionSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 4756 PetscFunctionReturn(0); 4757 } 4758 4759 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4760 { 4761 Mat *work_mat; 4762 IS isrow_s,iscol_s; 4763 PetscBool rsorted,csorted; 4764 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4765 PetscErrorCode ierr; 4766 4767 PetscFunctionBegin; 4768 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4769 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4770 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4771 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4772 4773 if (!rsorted) { 4774 const PetscInt *idxs; 4775 PetscInt *idxs_sorted,i; 4776 4777 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4778 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4779 for (i=0;i<rsize;i++) { 4780 idxs_perm_r[i] = i; 4781 } 4782 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4783 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4784 for (i=0;i<rsize;i++) { 4785 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4786 } 4787 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4788 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4789 } else { 4790 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4791 isrow_s = isrow; 4792 } 4793 4794 if (!csorted) { 4795 if (isrow == iscol) { 4796 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4797 iscol_s = isrow_s; 4798 } else { 4799 const PetscInt *idxs; 4800 PetscInt *idxs_sorted,i; 4801 4802 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4803 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4804 for (i=0;i<csize;i++) { 4805 idxs_perm_c[i] = i; 4806 } 4807 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4808 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4809 for (i=0;i<csize;i++) { 4810 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4811 } 4812 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4813 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4814 } 4815 } else { 4816 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4817 iscol_s = iscol; 4818 } 4819 4820 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4821 4822 if (!rsorted || !csorted) { 4823 Mat new_mat; 4824 IS is_perm_r,is_perm_c; 4825 4826 if (!rsorted) { 4827 PetscInt *idxs_r,i; 4828 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4829 for (i=0;i<rsize;i++) { 4830 idxs_r[idxs_perm_r[i]] = i; 4831 } 4832 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4833 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4834 } else { 4835 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4836 } 4837 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4838 4839 if (!csorted) { 4840 if (isrow_s == iscol_s) { 4841 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4842 is_perm_c = is_perm_r; 4843 } else { 4844 PetscInt *idxs_c,i; 4845 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4846 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4847 for (i=0;i<csize;i++) { 4848 idxs_c[idxs_perm_c[i]] = i; 4849 } 4850 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4851 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4852 } 4853 } else { 4854 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4855 } 4856 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4857 4858 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4859 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4860 work_mat[0] = new_mat; 4861 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4862 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4863 } 4864 4865 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4866 *B = work_mat[0]; 4867 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4868 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4869 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4870 PetscFunctionReturn(0); 4871 } 4872 4873 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4874 { 4875 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4876 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4877 Mat new_mat,lA; 4878 IS is_local,is_global; 4879 PetscInt local_size; 4880 PetscBool isseqaij; 4881 PetscErrorCode ierr; 4882 4883 PetscFunctionBegin; 4884 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4885 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4886 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4887 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4888 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4889 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4890 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4891 4892 /* check */ 4893 if (pcbddc->dbg_flag) { 4894 Vec x,x_change; 4895 PetscReal error; 4896 4897 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4898 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4899 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4900 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4901 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4902 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4903 if (!pcbddc->change_interior) { 4904 const PetscScalar *x,*y,*v; 4905 PetscReal lerror = 0.; 4906 PetscInt i; 4907 4908 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4909 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4910 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4911 for (i=0;i<local_size;i++) 4912 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4913 lerror = PetscAbsScalar(x[i]-y[i]); 4914 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4915 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4916 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4917 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4918 if (error > PETSC_SMALL) { 4919 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4920 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e",error); 4921 } else { 4922 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e",error); 4923 } 4924 } 4925 } 4926 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4927 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4928 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4929 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4930 if (error > PETSC_SMALL) { 4931 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4932 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 4933 } else { 4934 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e",error); 4935 } 4936 } 4937 ierr = VecDestroy(&x);CHKERRQ(ierr); 4938 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4939 } 4940 4941 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4942 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4943 4944 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4945 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4946 if (isseqaij) { 4947 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4948 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4949 if (lA) { 4950 Mat work; 4951 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4952 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4953 ierr = MatDestroy(&work);CHKERRQ(ierr); 4954 } 4955 } else { 4956 Mat work_mat; 4957 4958 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4959 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4960 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4961 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4962 if (lA) { 4963 Mat work; 4964 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4965 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4966 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4967 ierr = MatDestroy(&work);CHKERRQ(ierr); 4968 } 4969 } 4970 if (matis->A->symmetric_set) { 4971 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4972 #if !defined(PETSC_USE_COMPLEX) 4973 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4974 #endif 4975 } 4976 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4977 PetscFunctionReturn(0); 4978 } 4979 4980 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4981 { 4982 PC_IS* pcis = (PC_IS*)(pc->data); 4983 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4984 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4985 PetscInt *idx_R_local=NULL; 4986 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4987 PetscInt vbs,bs; 4988 PetscBT bitmask=NULL; 4989 PetscErrorCode ierr; 4990 4991 PetscFunctionBegin; 4992 /* 4993 No need to setup local scatters if 4994 - primal space is unchanged 4995 AND 4996 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4997 AND 4998 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4999 */ 5000 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 5001 PetscFunctionReturn(0); 5002 } 5003 /* destroy old objects */ 5004 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 5005 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 5006 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 5007 /* Set Non-overlapping dimensions */ 5008 n_B = pcis->n_B; 5009 n_D = pcis->n - n_B; 5010 n_vertices = pcbddc->n_vertices; 5011 5012 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5013 5014 /* create auxiliary bitmask and allocate workspace */ 5015 if (!sub_schurs || !sub_schurs->reuse_solver) { 5016 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 5017 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 5018 for (i=0;i<n_vertices;i++) { 5019 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 5020 } 5021 5022 for (i=0, n_R=0; i<pcis->n; i++) { 5023 if (!PetscBTLookup(bitmask,i)) { 5024 idx_R_local[n_R++] = i; 5025 } 5026 } 5027 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5028 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5029 5030 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5031 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 5032 } 5033 5034 /* Block code */ 5035 vbs = 1; 5036 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 5037 if (bs>1 && !(n_vertices%bs)) { 5038 PetscBool is_blocked = PETSC_TRUE; 5039 PetscInt *vary; 5040 if (!sub_schurs || !sub_schurs->reuse_solver) { 5041 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 5042 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 5043 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5044 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5045 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5046 for (i=0; i<pcis->n/bs; i++) { 5047 if (vary[i]!=0 && vary[i]!=bs) { 5048 is_blocked = PETSC_FALSE; 5049 break; 5050 } 5051 } 5052 ierr = PetscFree(vary);CHKERRQ(ierr); 5053 } else { 5054 /* Verify directly the R set */ 5055 for (i=0; i<n_R/bs; i++) { 5056 PetscInt j,node=idx_R_local[bs*i]; 5057 for (j=1; j<bs; j++) { 5058 if (node != idx_R_local[bs*i+j]-j) { 5059 is_blocked = PETSC_FALSE; 5060 break; 5061 } 5062 } 5063 } 5064 } 5065 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5066 vbs = bs; 5067 for (i=0;i<n_R/vbs;i++) { 5068 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5069 } 5070 } 5071 } 5072 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 5073 if (sub_schurs && sub_schurs->reuse_solver) { 5074 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5075 5076 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5077 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 5078 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 5079 reuse_solver->is_R = pcbddc->is_R_local; 5080 } else { 5081 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 5082 } 5083 5084 /* print some info if requested */ 5085 if (pcbddc->dbg_flag) { 5086 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5087 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5088 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5089 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 5090 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %D, dirichlet_size = %D, boundary_size = %D\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 5091 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %D, v_size = %D, constraints = %D, local_primal_size = %D\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 5092 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5093 } 5094 5095 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5096 if (!sub_schurs || !sub_schurs->reuse_solver) { 5097 IS is_aux1,is_aux2; 5098 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5099 5100 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5101 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 5102 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 5103 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5104 for (i=0; i<n_D; i++) { 5105 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 5106 } 5107 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5108 for (i=0, j=0; i<n_R; i++) { 5109 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5110 aux_array1[j++] = i; 5111 } 5112 } 5113 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5114 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5115 for (i=0, j=0; i<n_B; i++) { 5116 if (!PetscBTLookup(bitmask,is_indices[i])) { 5117 aux_array2[j++] = i; 5118 } 5119 } 5120 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5121 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 5122 ierr = VecScatterCreateWithData(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 5123 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5124 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 5125 5126 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5127 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 5128 for (i=0, j=0; i<n_R; i++) { 5129 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5130 aux_array1[j++] = i; 5131 } 5132 } 5133 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5134 ierr = VecScatterCreateWithData(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5135 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5136 } 5137 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 5138 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5139 } else { 5140 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5141 IS tis; 5142 PetscInt schur_size; 5143 5144 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 5145 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 5146 ierr = VecScatterCreateWithData(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 5147 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5148 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5149 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 5150 ierr = VecScatterCreateWithData(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5151 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5152 } 5153 } 5154 PetscFunctionReturn(0); 5155 } 5156 5157 5158 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5159 { 5160 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5161 PC_IS *pcis = (PC_IS*)pc->data; 5162 PC pc_temp; 5163 Mat A_RR; 5164 MatReuse reuse; 5165 PetscScalar m_one = -1.0; 5166 PetscReal value; 5167 PetscInt n_D,n_R; 5168 PetscBool check_corr,issbaij; 5169 PetscErrorCode ierr; 5170 /* prefixes stuff */ 5171 char dir_prefix[256],neu_prefix[256],str_level[16]; 5172 size_t len; 5173 5174 PetscFunctionBegin; 5175 ierr = PetscLogEventBegin(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5176 /* compute prefixes */ 5177 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 5178 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 5179 if (!pcbddc->current_level) { 5180 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,sizeof(dir_prefix));CHKERRQ(ierr); 5181 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,sizeof(neu_prefix));CHKERRQ(ierr); 5182 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5183 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5184 } else { 5185 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 5186 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5187 len -= 15; /* remove "pc_bddc_coarse_" */ 5188 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5189 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5190 /* Nonstandard use of PetscStrncpy() to only copy a portion of the input string */ 5191 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5192 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5193 ierr = PetscStrlcat(dir_prefix,"pc_bddc_dirichlet_",sizeof(dir_prefix));CHKERRQ(ierr); 5194 ierr = PetscStrlcat(neu_prefix,"pc_bddc_neumann_",sizeof(neu_prefix));CHKERRQ(ierr); 5195 ierr = PetscStrlcat(dir_prefix,str_level,sizeof(dir_prefix));CHKERRQ(ierr); 5196 ierr = PetscStrlcat(neu_prefix,str_level,sizeof(neu_prefix));CHKERRQ(ierr); 5197 } 5198 5199 /* DIRICHLET PROBLEM */ 5200 if (dirichlet) { 5201 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5202 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5203 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 5204 if (pcbddc->dbg_flag) { 5205 Mat A_IIn; 5206 5207 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5208 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5209 pcis->A_II = A_IIn; 5210 } 5211 } 5212 if (pcbddc->local_mat->symmetric_set) { 5213 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5214 } 5215 /* Matrix for Dirichlet problem is pcis->A_II */ 5216 n_D = pcis->n - pcis->n_B; 5217 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5218 void (*f)(void) = 0; 5219 5220 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5221 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5222 /* default */ 5223 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5224 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5225 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5226 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5227 if (issbaij) { 5228 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5229 } else { 5230 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5231 } 5232 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_D,pc->erroriffailure);CHKERRQ(ierr); 5233 /* Allow user's customization */ 5234 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5235 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5236 if (f && pcbddc->mat_graph->cloc) { 5237 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5238 const PetscInt *idxs; 5239 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5240 5241 ierr = ISGetLocalSize(pcis->is_I_local,&nl);CHKERRQ(ierr); 5242 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5243 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5244 for (i=0;i<nl;i++) { 5245 for (d=0;d<cdim;d++) { 5246 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5247 } 5248 } 5249 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5250 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5251 ierr = PetscFree(scoords);CHKERRQ(ierr); 5252 } 5253 } 5254 ierr = MatSetOptionsPrefix(pcis->A_II,((PetscObject)pcbddc->ksp_D)->prefix);CHKERRQ(ierr); 5255 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 5256 if (sub_schurs && sub_schurs->reuse_solver) { 5257 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5258 5259 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5260 } 5261 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5262 if (!n_D) { 5263 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5264 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5265 } 5266 /* set ksp_D into pcis data */ 5267 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5268 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5269 pcis->ksp_D = pcbddc->ksp_D; 5270 } 5271 5272 /* NEUMANN PROBLEM */ 5273 A_RR = 0; 5274 if (neumann) { 5275 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5276 PetscInt ibs,mbs; 5277 PetscBool issbaij, reuse_neumann_solver; 5278 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5279 5280 reuse_neumann_solver = PETSC_FALSE; 5281 if (sub_schurs && sub_schurs->reuse_solver) { 5282 IS iP; 5283 5284 reuse_neumann_solver = PETSC_TRUE; 5285 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5286 if (iP) reuse_neumann_solver = PETSC_FALSE; 5287 } 5288 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5289 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5290 if (pcbddc->ksp_R) { /* already created ksp */ 5291 PetscInt nn_R; 5292 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5293 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5294 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5295 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5296 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5297 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5298 reuse = MAT_INITIAL_MATRIX; 5299 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5300 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5301 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5302 reuse = MAT_INITIAL_MATRIX; 5303 } else { /* safe to reuse the matrix */ 5304 reuse = MAT_REUSE_MATRIX; 5305 } 5306 } 5307 /* last check */ 5308 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5309 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5310 reuse = MAT_INITIAL_MATRIX; 5311 } 5312 } else { /* first time, so we need to create the matrix */ 5313 reuse = MAT_INITIAL_MATRIX; 5314 } 5315 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 5316 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5317 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5318 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5319 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5320 if (matis->A == pcbddc->local_mat) { 5321 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5322 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5323 } else { 5324 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5325 } 5326 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5327 if (matis->A == pcbddc->local_mat) { 5328 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5329 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5330 } else { 5331 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5332 } 5333 } 5334 /* extract A_RR */ 5335 if (reuse_neumann_solver) { 5336 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5337 5338 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5339 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5340 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5341 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5342 } else { 5343 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5344 } 5345 } else { 5346 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5347 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5348 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5349 } 5350 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5351 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5352 } 5353 if (pcbddc->local_mat->symmetric_set) { 5354 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5355 } 5356 if (!pcbddc->ksp_R) { /* create object if not present */ 5357 void (*f)(void) = 0; 5358 5359 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5360 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5361 /* default */ 5362 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5363 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5364 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5365 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5366 if (issbaij) { 5367 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5368 } else { 5369 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5370 } 5371 ierr = KSPSetErrorIfNotConverged(pcbddc->ksp_R,pc->erroriffailure);CHKERRQ(ierr); 5372 /* Allow user's customization */ 5373 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5374 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5375 if (f && pcbddc->mat_graph->cloc) { 5376 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5377 const PetscInt *idxs; 5378 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5379 5380 ierr = ISGetLocalSize(pcbddc->is_R_local,&nl);CHKERRQ(ierr); 5381 ierr = ISGetIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5382 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5383 for (i=0;i<nl;i++) { 5384 for (d=0;d<cdim;d++) { 5385 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5386 } 5387 } 5388 ierr = ISRestoreIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5389 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5390 ierr = PetscFree(scoords);CHKERRQ(ierr); 5391 } 5392 } 5393 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5394 if (!n_R) { 5395 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5396 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5397 } 5398 ierr = MatSetOptionsPrefix(A_RR,((PetscObject)pcbddc->ksp_R)->prefix);CHKERRQ(ierr); 5399 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5400 /* Reuse solver if it is present */ 5401 if (reuse_neumann_solver) { 5402 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5403 5404 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5405 } 5406 } 5407 5408 if (pcbddc->dbg_flag) { 5409 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5410 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5411 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5412 } 5413 5414 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5415 check_corr = PETSC_FALSE; 5416 if (pcbddc->NullSpace_corr[0]) { 5417 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5418 } 5419 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5420 check_corr = PETSC_TRUE; 5421 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5422 } 5423 if (neumann && pcbddc->NullSpace_corr[2]) { 5424 check_corr = PETSC_TRUE; 5425 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5426 } 5427 /* check Dirichlet and Neumann solvers */ 5428 if (pcbddc->dbg_flag) { 5429 if (dirichlet) { /* Dirichlet */ 5430 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5431 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5432 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5433 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5434 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5435 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5436 if (check_corr) { 5437 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 5438 } 5439 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5440 } 5441 if (neumann) { /* Neumann */ 5442 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5443 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5444 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5445 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5446 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5447 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5448 if (check_corr) { 5449 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 5450 } 5451 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5452 } 5453 } 5454 /* free Neumann problem's matrix */ 5455 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5456 ierr = PetscLogEventEnd(PC_BDDC_LocalSolvers[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 5457 PetscFunctionReturn(0); 5458 } 5459 5460 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5461 { 5462 PetscErrorCode ierr; 5463 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5464 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5465 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 5466 5467 PetscFunctionBegin; 5468 if (!reuse_solver) { 5469 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5470 } 5471 if (!pcbddc->switch_static) { 5472 if (applytranspose && pcbddc->local_auxmat1) { 5473 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5474 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5475 } 5476 if (!reuse_solver) { 5477 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5478 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5479 } else { 5480 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5481 5482 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5483 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5484 } 5485 } else { 5486 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5487 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5488 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5489 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5490 if (applytranspose && pcbddc->local_auxmat1) { 5491 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5492 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5493 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5494 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5495 } 5496 } 5497 if (!reuse_solver || pcbddc->switch_static) { 5498 if (applytranspose) { 5499 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5500 } else { 5501 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5502 } 5503 } else { 5504 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5505 5506 if (applytranspose) { 5507 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5508 } else { 5509 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5510 } 5511 } 5512 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5513 if (!pcbddc->switch_static) { 5514 if (!reuse_solver) { 5515 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5516 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5517 } else { 5518 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5519 5520 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5521 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5522 } 5523 if (!applytranspose && pcbddc->local_auxmat1) { 5524 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5525 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5526 } 5527 } else { 5528 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5529 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5530 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5531 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5532 if (!applytranspose && pcbddc->local_auxmat1) { 5533 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5534 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5535 } 5536 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5537 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5538 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5539 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5540 } 5541 PetscFunctionReturn(0); 5542 } 5543 5544 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5545 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5546 { 5547 PetscErrorCode ierr; 5548 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5549 PC_IS* pcis = (PC_IS*) (pc->data); 5550 const PetscScalar zero = 0.0; 5551 5552 PetscFunctionBegin; 5553 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5554 if (!pcbddc->benign_apply_coarse_only) { 5555 if (applytranspose) { 5556 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5557 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5558 } else { 5559 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5560 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5561 } 5562 } else { 5563 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5564 } 5565 5566 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5567 if (pcbddc->benign_n) { 5568 PetscScalar *array; 5569 PetscInt j; 5570 5571 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5572 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5573 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5574 } 5575 5576 /* start communications from local primal nodes to rhs of coarse solver */ 5577 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5578 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5579 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5580 5581 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5582 if (pcbddc->coarse_ksp) { 5583 Mat coarse_mat; 5584 Vec rhs,sol; 5585 MatNullSpace nullsp; 5586 PetscBool isbddc = PETSC_FALSE; 5587 5588 if (pcbddc->benign_have_null) { 5589 PC coarse_pc; 5590 5591 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5592 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5593 /* we need to propagate to coarser levels the need for a possible benign correction */ 5594 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5595 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5596 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5597 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5598 } 5599 } 5600 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5601 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5602 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5603 if (applytranspose) { 5604 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5605 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5606 ierr = MatGetTransposeNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5607 if (nullsp) { 5608 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5609 } 5610 } else { 5611 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5612 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5613 PC coarse_pc; 5614 5615 if (nullsp) { 5616 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5617 } 5618 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5619 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5620 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5621 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5622 } else { 5623 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5624 if (nullsp) { 5625 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5626 } 5627 } 5628 } 5629 /* we don't need the benign correction at coarser levels anymore */ 5630 if (pcbddc->benign_have_null && isbddc) { 5631 PC coarse_pc; 5632 PC_BDDC* coarsepcbddc; 5633 5634 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5635 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5636 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5637 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5638 } 5639 } 5640 5641 /* Local solution on R nodes */ 5642 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5643 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5644 } 5645 /* communications from coarse sol to local primal nodes */ 5646 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5647 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5648 5649 /* Sum contributions from the two levels */ 5650 if (!pcbddc->benign_apply_coarse_only) { 5651 if (applytranspose) { 5652 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5653 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5654 } else { 5655 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5656 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5657 } 5658 /* store p0 */ 5659 if (pcbddc->benign_n) { 5660 PetscScalar *array; 5661 PetscInt j; 5662 5663 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5664 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5665 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5666 } 5667 } else { /* expand the coarse solution */ 5668 if (applytranspose) { 5669 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5670 } else { 5671 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5672 } 5673 } 5674 PetscFunctionReturn(0); 5675 } 5676 5677 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5678 { 5679 PetscErrorCode ierr; 5680 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5681 PetscScalar *array; 5682 Vec from,to; 5683 5684 PetscFunctionBegin; 5685 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5686 from = pcbddc->coarse_vec; 5687 to = pcbddc->vec1_P; 5688 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5689 Vec tvec; 5690 5691 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5692 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5693 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5694 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5695 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5696 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5697 } 5698 } else { /* from local to global -> put data in coarse right hand side */ 5699 from = pcbddc->vec1_P; 5700 to = pcbddc->coarse_vec; 5701 } 5702 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5703 PetscFunctionReturn(0); 5704 } 5705 5706 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5707 { 5708 PetscErrorCode ierr; 5709 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5710 PetscScalar *array; 5711 Vec from,to; 5712 5713 PetscFunctionBegin; 5714 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5715 from = pcbddc->coarse_vec; 5716 to = pcbddc->vec1_P; 5717 } else { /* from local to global -> put data in coarse right hand side */ 5718 from = pcbddc->vec1_P; 5719 to = pcbddc->coarse_vec; 5720 } 5721 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5722 if (smode == SCATTER_FORWARD) { 5723 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5724 Vec tvec; 5725 5726 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5727 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5728 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5729 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5730 } 5731 } else { 5732 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5733 ierr = VecResetArray(from);CHKERRQ(ierr); 5734 } 5735 } 5736 PetscFunctionReturn(0); 5737 } 5738 5739 /* uncomment for testing purposes */ 5740 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5741 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5742 { 5743 PetscErrorCode ierr; 5744 PC_IS* pcis = (PC_IS*)(pc->data); 5745 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5746 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5747 /* one and zero */ 5748 PetscScalar one=1.0,zero=0.0; 5749 /* space to store constraints and their local indices */ 5750 PetscScalar *constraints_data; 5751 PetscInt *constraints_idxs,*constraints_idxs_B; 5752 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5753 PetscInt *constraints_n; 5754 /* iterators */ 5755 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5756 /* BLAS integers */ 5757 PetscBLASInt lwork,lierr; 5758 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5759 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5760 /* reuse */ 5761 PetscInt olocal_primal_size,olocal_primal_size_cc; 5762 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5763 /* change of basis */ 5764 PetscBool qr_needed; 5765 PetscBT change_basis,qr_needed_idx; 5766 /* auxiliary stuff */ 5767 PetscInt *nnz,*is_indices; 5768 PetscInt ncc; 5769 /* some quantities */ 5770 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5771 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5772 PetscReal tol; /* tolerance for retaining eigenmodes */ 5773 5774 PetscFunctionBegin; 5775 tol = PetscSqrtReal(PETSC_SMALL); 5776 /* Destroy Mat objects computed previously */ 5777 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5778 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5779 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5780 /* save info on constraints from previous setup (if any) */ 5781 olocal_primal_size = pcbddc->local_primal_size; 5782 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5783 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5784 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5785 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5786 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5787 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5788 5789 if (!pcbddc->adaptive_selection) { 5790 IS ISForVertices,*ISForFaces,*ISForEdges; 5791 MatNullSpace nearnullsp; 5792 const Vec *nearnullvecs; 5793 Vec *localnearnullsp; 5794 PetscScalar *array; 5795 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5796 PetscBool nnsp_has_cnst; 5797 /* LAPACK working arrays for SVD or POD */ 5798 PetscBool skip_lapack,boolforchange; 5799 PetscScalar *work; 5800 PetscReal *singular_vals; 5801 #if defined(PETSC_USE_COMPLEX) 5802 PetscReal *rwork; 5803 #endif 5804 #if defined(PETSC_MISSING_LAPACK_GESVD) 5805 PetscScalar *temp_basis,*correlation_mat; 5806 #else 5807 PetscBLASInt dummy_int=1; 5808 PetscScalar dummy_scalar=1.; 5809 #endif 5810 5811 /* Get index sets for faces, edges and vertices from graph */ 5812 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5813 /* print some info */ 5814 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5815 PetscInt nv; 5816 5817 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5818 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5819 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5820 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5821 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5822 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5823 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5824 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5825 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5826 } 5827 5828 /* free unneeded index sets */ 5829 if (!pcbddc->use_vertices) { 5830 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5831 } 5832 if (!pcbddc->use_edges) { 5833 for (i=0;i<n_ISForEdges;i++) { 5834 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5835 } 5836 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5837 n_ISForEdges = 0; 5838 } 5839 if (!pcbddc->use_faces) { 5840 for (i=0;i<n_ISForFaces;i++) { 5841 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5842 } 5843 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5844 n_ISForFaces = 0; 5845 } 5846 5847 /* check if near null space is attached to global mat */ 5848 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5849 if (nearnullsp) { 5850 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5851 /* remove any stored info */ 5852 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5853 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5854 /* store information for BDDC solver reuse */ 5855 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5856 pcbddc->onearnullspace = nearnullsp; 5857 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5858 for (i=0;i<nnsp_size;i++) { 5859 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5860 } 5861 } else { /* if near null space is not provided BDDC uses constants by default */ 5862 nnsp_size = 0; 5863 nnsp_has_cnst = PETSC_TRUE; 5864 } 5865 /* get max number of constraints on a single cc */ 5866 max_constraints = nnsp_size; 5867 if (nnsp_has_cnst) max_constraints++; 5868 5869 /* 5870 Evaluate maximum storage size needed by the procedure 5871 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5872 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5873 There can be multiple constraints per connected component 5874 */ 5875 n_vertices = 0; 5876 if (ISForVertices) { 5877 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5878 } 5879 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5880 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5881 5882 total_counts = n_ISForFaces+n_ISForEdges; 5883 total_counts *= max_constraints; 5884 total_counts += n_vertices; 5885 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5886 5887 total_counts = 0; 5888 max_size_of_constraint = 0; 5889 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5890 IS used_is; 5891 if (i<n_ISForEdges) { 5892 used_is = ISForEdges[i]; 5893 } else { 5894 used_is = ISForFaces[i-n_ISForEdges]; 5895 } 5896 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5897 total_counts += j; 5898 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5899 } 5900 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5901 5902 /* get local part of global near null space vectors */ 5903 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5904 for (k=0;k<nnsp_size;k++) { 5905 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5906 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5907 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5908 } 5909 5910 /* whether or not to skip lapack calls */ 5911 skip_lapack = PETSC_TRUE; 5912 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5913 5914 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5915 if (!skip_lapack) { 5916 PetscScalar temp_work; 5917 5918 #if defined(PETSC_MISSING_LAPACK_GESVD) 5919 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5920 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5921 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5922 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5923 #if defined(PETSC_USE_COMPLEX) 5924 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5925 #endif 5926 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5927 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5928 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5929 lwork = -1; 5930 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5931 #if !defined(PETSC_USE_COMPLEX) 5932 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5933 #else 5934 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5935 #endif 5936 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5937 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5938 #else /* on missing GESVD */ 5939 /* SVD */ 5940 PetscInt max_n,min_n; 5941 max_n = max_size_of_constraint; 5942 min_n = max_constraints; 5943 if (max_size_of_constraint < max_constraints) { 5944 min_n = max_size_of_constraint; 5945 max_n = max_constraints; 5946 } 5947 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5948 #if defined(PETSC_USE_COMPLEX) 5949 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5950 #endif 5951 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5952 lwork = -1; 5953 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5954 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5955 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5956 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5957 #if !defined(PETSC_USE_COMPLEX) 5958 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5959 #else 5960 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5961 #endif 5962 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5963 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5964 #endif /* on missing GESVD */ 5965 /* Allocate optimal workspace */ 5966 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5967 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5968 } 5969 /* Now we can loop on constraining sets */ 5970 total_counts = 0; 5971 constraints_idxs_ptr[0] = 0; 5972 constraints_data_ptr[0] = 0; 5973 /* vertices */ 5974 if (n_vertices) { 5975 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5976 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5977 for (i=0;i<n_vertices;i++) { 5978 constraints_n[total_counts] = 1; 5979 constraints_data[total_counts] = 1.0; 5980 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5981 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5982 total_counts++; 5983 } 5984 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5985 n_vertices = total_counts; 5986 } 5987 5988 /* edges and faces */ 5989 total_counts_cc = total_counts; 5990 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5991 IS used_is; 5992 PetscBool idxs_copied = PETSC_FALSE; 5993 5994 if (ncc<n_ISForEdges) { 5995 used_is = ISForEdges[ncc]; 5996 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5997 } else { 5998 used_is = ISForFaces[ncc-n_ISForEdges]; 5999 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 6000 } 6001 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 6002 6003 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 6004 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6005 /* change of basis should not be performed on local periodic nodes */ 6006 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6007 if (nnsp_has_cnst) { 6008 PetscScalar quad_value; 6009 6010 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 6011 idxs_copied = PETSC_TRUE; 6012 6013 if (!pcbddc->use_nnsp_true) { 6014 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 6015 } else { 6016 quad_value = 1.0; 6017 } 6018 for (j=0;j<size_of_constraint;j++) { 6019 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 6020 } 6021 temp_constraints++; 6022 total_counts++; 6023 } 6024 for (k=0;k<nnsp_size;k++) { 6025 PetscReal real_value; 6026 PetscScalar *ptr_to_data; 6027 6028 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6029 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6030 for (j=0;j<size_of_constraint;j++) { 6031 ptr_to_data[j] = array[is_indices[j]]; 6032 } 6033 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6034 /* check if array is null on the connected component */ 6035 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6036 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6037 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6038 temp_constraints++; 6039 total_counts++; 6040 if (!idxs_copied) { 6041 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 6042 idxs_copied = PETSC_TRUE; 6043 } 6044 } 6045 } 6046 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6047 valid_constraints = temp_constraints; 6048 if (!pcbddc->use_nnsp_true && temp_constraints) { 6049 if (temp_constraints == 1) { /* just normalize the constraint */ 6050 PetscScalar norm,*ptr_to_data; 6051 6052 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6053 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6054 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6055 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6056 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6057 } else { /* perform SVD */ 6058 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6059 6060 #if defined(PETSC_MISSING_LAPACK_GESVD) 6061 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6062 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6063 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6064 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6065 from that computed using LAPACKgesvd 6066 -> This is due to a different computation of eigenvectors in LAPACKheev 6067 -> The quality of the POD-computed basis will be the same */ 6068 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 6069 /* Store upper triangular part of correlation matrix */ 6070 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6071 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6072 for (j=0;j<temp_constraints;j++) { 6073 for (k=0;k<j+1;k++) { 6074 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6075 } 6076 } 6077 /* compute eigenvalues and eigenvectors of correlation matrix */ 6078 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6079 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 6080 #if !defined(PETSC_USE_COMPLEX) 6081 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6082 #else 6083 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6084 #endif 6085 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6086 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6087 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6088 j = 0; 6089 while (j < temp_constraints && singular_vals[j]/singular_vals[temp_constraints-1] < tol) j++; 6090 total_counts = total_counts-j; 6091 valid_constraints = temp_constraints-j; 6092 /* scale and copy POD basis into used quadrature memory */ 6093 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6094 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6095 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 6096 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6097 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 6098 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6099 if (j<temp_constraints) { 6100 PetscInt ii; 6101 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6102 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6103 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6104 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6105 for (k=0;k<temp_constraints-j;k++) { 6106 for (ii=0;ii<size_of_constraint;ii++) { 6107 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6108 } 6109 } 6110 } 6111 #else /* on missing GESVD */ 6112 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6113 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6114 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6115 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6116 #if !defined(PETSC_USE_COMPLEX) 6117 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6118 #else 6119 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6120 #endif 6121 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6122 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6123 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6124 k = temp_constraints; 6125 if (k > size_of_constraint) k = size_of_constraint; 6126 j = 0; 6127 while (j < k && singular_vals[k-j-1]/singular_vals[0] < tol) j++; 6128 valid_constraints = k-j; 6129 total_counts = total_counts-temp_constraints+valid_constraints; 6130 #endif /* on missing GESVD */ 6131 } 6132 } 6133 /* update pointers information */ 6134 if (valid_constraints) { 6135 constraints_n[total_counts_cc] = valid_constraints; 6136 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6137 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6138 /* set change_of_basis flag */ 6139 if (boolforchange) { 6140 PetscBTSet(change_basis,total_counts_cc); 6141 } 6142 total_counts_cc++; 6143 } 6144 } 6145 /* free workspace */ 6146 if (!skip_lapack) { 6147 ierr = PetscFree(work);CHKERRQ(ierr); 6148 #if defined(PETSC_USE_COMPLEX) 6149 ierr = PetscFree(rwork);CHKERRQ(ierr); 6150 #endif 6151 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 6152 #if defined(PETSC_MISSING_LAPACK_GESVD) 6153 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 6154 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 6155 #endif 6156 } 6157 for (k=0;k<nnsp_size;k++) { 6158 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 6159 } 6160 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 6161 /* free index sets of faces, edges and vertices */ 6162 for (i=0;i<n_ISForFaces;i++) { 6163 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6164 } 6165 if (n_ISForFaces) { 6166 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6167 } 6168 for (i=0;i<n_ISForEdges;i++) { 6169 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6170 } 6171 if (n_ISForEdges) { 6172 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6173 } 6174 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6175 } else { 6176 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6177 6178 total_counts = 0; 6179 n_vertices = 0; 6180 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6181 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6182 } 6183 max_constraints = 0; 6184 total_counts_cc = 0; 6185 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6186 total_counts += pcbddc->adaptive_constraints_n[i]; 6187 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6188 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6189 } 6190 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6191 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6192 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6193 constraints_data = pcbddc->adaptive_constraints_data; 6194 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6195 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 6196 total_counts_cc = 0; 6197 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6198 if (pcbddc->adaptive_constraints_n[i]) { 6199 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6200 } 6201 } 6202 6203 max_size_of_constraint = 0; 6204 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6205 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 6206 /* Change of basis */ 6207 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 6208 if (pcbddc->use_change_of_basis) { 6209 for (i=0;i<sub_schurs->n_subs;i++) { 6210 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6211 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 6212 } 6213 } 6214 } 6215 } 6216 pcbddc->local_primal_size = total_counts; 6217 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6218 6219 /* map constraints_idxs in boundary numbering */ 6220 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 6221 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D",constraints_idxs_ptr[total_counts_cc],i); 6222 6223 /* Create constraint matrix */ 6224 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6225 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 6226 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 6227 6228 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6229 /* determine if a QR strategy is needed for change of basis */ 6230 qr_needed = pcbddc->use_qr_single; 6231 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6232 total_primal_vertices=0; 6233 pcbddc->local_primal_size_cc = 0; 6234 for (i=0;i<total_counts_cc;i++) { 6235 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6236 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6237 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6238 pcbddc->local_primal_size_cc += 1; 6239 } else if (PetscBTLookup(change_basis,i)) { 6240 for (k=0;k<constraints_n[i];k++) { 6241 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6242 } 6243 pcbddc->local_primal_size_cc += constraints_n[i]; 6244 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6245 PetscBTSet(qr_needed_idx,i); 6246 qr_needed = PETSC_TRUE; 6247 } 6248 } else { 6249 pcbddc->local_primal_size_cc += 1; 6250 } 6251 } 6252 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6253 pcbddc->n_vertices = total_primal_vertices; 6254 /* permute indices in order to have a sorted set of vertices */ 6255 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6256 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6257 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 6258 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6259 6260 /* nonzero structure of constraint matrix */ 6261 /* and get reference dof for local constraints */ 6262 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6263 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6264 6265 j = total_primal_vertices; 6266 total_counts = total_primal_vertices; 6267 cum = total_primal_vertices; 6268 for (i=n_vertices;i<total_counts_cc;i++) { 6269 if (!PetscBTLookup(change_basis,i)) { 6270 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6271 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6272 cum++; 6273 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6274 for (k=0;k<constraints_n[i];k++) { 6275 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6276 nnz[j+k] = size_of_constraint; 6277 } 6278 j += constraints_n[i]; 6279 } 6280 } 6281 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6282 ierr = MatSetOption(pcbddc->ConstraintMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6283 ierr = PetscFree(nnz);CHKERRQ(ierr); 6284 6285 /* set values in constraint matrix */ 6286 for (i=0;i<total_primal_vertices;i++) { 6287 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6288 } 6289 total_counts = total_primal_vertices; 6290 for (i=n_vertices;i<total_counts_cc;i++) { 6291 if (!PetscBTLookup(change_basis,i)) { 6292 PetscInt *cols; 6293 6294 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6295 cols = constraints_idxs+constraints_idxs_ptr[i]; 6296 for (k=0;k<constraints_n[i];k++) { 6297 PetscInt row = total_counts+k; 6298 PetscScalar *vals; 6299 6300 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6301 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6302 } 6303 total_counts += constraints_n[i]; 6304 } 6305 } 6306 /* assembling */ 6307 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6308 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6309 ierr = MatViewFromOptions(pcbddc->ConstraintMatrix,NULL,"-pc_bddc_constraint_mat_view");CHKERRQ(ierr); 6310 6311 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6312 if (pcbddc->use_change_of_basis) { 6313 /* dual and primal dofs on a single cc */ 6314 PetscInt dual_dofs,primal_dofs; 6315 /* working stuff for GEQRF */ 6316 PetscScalar *qr_basis = NULL,*qr_tau = NULL,*qr_work = NULL,lqr_work_t; 6317 PetscBLASInt lqr_work; 6318 /* working stuff for UNGQR */ 6319 PetscScalar *gqr_work = NULL,lgqr_work_t; 6320 PetscBLASInt lgqr_work; 6321 /* working stuff for TRTRS */ 6322 PetscScalar *trs_rhs = NULL; 6323 PetscBLASInt Blas_NRHS; 6324 /* pointers for values insertion into change of basis matrix */ 6325 PetscInt *start_rows,*start_cols; 6326 PetscScalar *start_vals; 6327 /* working stuff for values insertion */ 6328 PetscBT is_primal; 6329 PetscInt *aux_primal_numbering_B; 6330 /* matrix sizes */ 6331 PetscInt global_size,local_size; 6332 /* temporary change of basis */ 6333 Mat localChangeOfBasisMatrix; 6334 /* extra space for debugging */ 6335 PetscScalar *dbg_work = NULL; 6336 6337 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6338 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6339 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6340 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6341 /* nonzeros for local mat */ 6342 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6343 if (!pcbddc->benign_change || pcbddc->fake_change) { 6344 for (i=0;i<pcis->n;i++) nnz[i]=1; 6345 } else { 6346 const PetscInt *ii; 6347 PetscInt n; 6348 PetscBool flg_row; 6349 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6350 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6351 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6352 } 6353 for (i=n_vertices;i<total_counts_cc;i++) { 6354 if (PetscBTLookup(change_basis,i)) { 6355 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6356 if (PetscBTLookup(qr_needed_idx,i)) { 6357 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6358 } else { 6359 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6360 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6361 } 6362 } 6363 } 6364 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6365 ierr = MatSetOption(localChangeOfBasisMatrix,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_TRUE);CHKERRQ(ierr); 6366 ierr = PetscFree(nnz);CHKERRQ(ierr); 6367 /* Set interior change in the matrix */ 6368 if (!pcbddc->benign_change || pcbddc->fake_change) { 6369 for (i=0;i<pcis->n;i++) { 6370 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6371 } 6372 } else { 6373 const PetscInt *ii,*jj; 6374 PetscScalar *aa; 6375 PetscInt n; 6376 PetscBool flg_row; 6377 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6378 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6379 for (i=0;i<n;i++) { 6380 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6381 } 6382 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6383 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6384 } 6385 6386 if (pcbddc->dbg_flag) { 6387 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6388 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6389 } 6390 6391 6392 /* Now we loop on the constraints which need a change of basis */ 6393 /* 6394 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6395 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6396 6397 Basic blocks of change of basis matrix T computed by 6398 6399 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6400 6401 | 1 0 ... 0 s_1/S | 6402 | 0 1 ... 0 s_2/S | 6403 | ... | 6404 | 0 ... 1 s_{n-1}/S | 6405 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6406 6407 with S = \sum_{i=1}^n s_i^2 6408 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6409 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6410 6411 - QR decomposition of constraints otherwise 6412 */ 6413 if (qr_needed && max_size_of_constraint) { 6414 /* space to store Q */ 6415 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6416 /* array to store scaling factors for reflectors */ 6417 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6418 /* first we issue queries for optimal work */ 6419 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6420 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6421 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6422 lqr_work = -1; 6423 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6424 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6425 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6426 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6427 lgqr_work = -1; 6428 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6429 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6430 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6431 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6432 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6433 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6434 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6435 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6436 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6437 /* array to store rhs and solution of triangular solver */ 6438 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6439 /* allocating workspace for check */ 6440 if (pcbddc->dbg_flag) { 6441 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6442 } 6443 } 6444 /* array to store whether a node is primal or not */ 6445 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6446 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6447 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6448 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D",total_primal_vertices,i); 6449 for (i=0;i<total_primal_vertices;i++) { 6450 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6451 } 6452 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6453 6454 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6455 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6456 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6457 if (PetscBTLookup(change_basis,total_counts)) { 6458 /* get constraint info */ 6459 primal_dofs = constraints_n[total_counts]; 6460 dual_dofs = size_of_constraint-primal_dofs; 6461 6462 if (pcbddc->dbg_flag) { 6463 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %D: %D need a change of basis (size %D)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6464 } 6465 6466 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6467 6468 /* copy quadrature constraints for change of basis check */ 6469 if (pcbddc->dbg_flag) { 6470 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6471 } 6472 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6473 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6474 6475 /* compute QR decomposition of constraints */ 6476 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6477 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6478 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6479 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6480 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6481 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6482 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6483 6484 /* explictly compute R^-T */ 6485 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 6486 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6487 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6488 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6489 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6490 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6491 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6492 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6493 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6494 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6495 6496 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6497 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6498 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6499 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6500 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6501 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6502 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6503 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6504 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6505 6506 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6507 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6508 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6509 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6510 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6511 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6512 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6513 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6514 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6515 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6516 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6517 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6518 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6519 6520 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6521 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6522 /* insert cols for primal dofs */ 6523 for (j=0;j<primal_dofs;j++) { 6524 start_vals = &qr_basis[j*size_of_constraint]; 6525 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6526 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6527 } 6528 /* insert cols for dual dofs */ 6529 for (j=0,k=0;j<dual_dofs;k++) { 6530 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6531 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6532 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6533 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6534 j++; 6535 } 6536 } 6537 6538 /* check change of basis */ 6539 if (pcbddc->dbg_flag) { 6540 PetscInt ii,jj; 6541 PetscBool valid_qr=PETSC_TRUE; 6542 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6543 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6544 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6545 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6546 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6547 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6548 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6549 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6550 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6551 for (jj=0;jj<size_of_constraint;jj++) { 6552 for (ii=0;ii<primal_dofs;ii++) { 6553 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6554 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6555 } 6556 } 6557 if (!valid_qr) { 6558 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6559 for (jj=0;jj<size_of_constraint;jj++) { 6560 for (ii=0;ii<primal_dofs;ii++) { 6561 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6562 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not orthogonal to constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6563 } 6564 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6565 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %D is not unitary w.r.t constraint %D (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]));CHKERRQ(ierr); 6566 } 6567 } 6568 } 6569 } else { 6570 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6571 } 6572 } 6573 } else { /* simple transformation block */ 6574 PetscInt row,col; 6575 PetscScalar val,norm; 6576 6577 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6578 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6579 for (j=0;j<size_of_constraint;j++) { 6580 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6581 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6582 if (!PetscBTLookup(is_primal,row_B)) { 6583 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6584 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6585 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6586 } else { 6587 for (k=0;k<size_of_constraint;k++) { 6588 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6589 if (row != col) { 6590 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6591 } else { 6592 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6593 } 6594 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6595 } 6596 } 6597 } 6598 if (pcbddc->dbg_flag) { 6599 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6600 } 6601 } 6602 } else { 6603 if (pcbddc->dbg_flag) { 6604 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %D does not need a change of basis (size %D)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6605 } 6606 } 6607 } 6608 6609 /* free workspace */ 6610 if (qr_needed) { 6611 if (pcbddc->dbg_flag) { 6612 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6613 } 6614 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6615 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6616 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6617 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6618 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6619 } 6620 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6621 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6622 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6623 6624 /* assembling of global change of variable */ 6625 if (!pcbddc->fake_change) { 6626 Mat tmat; 6627 PetscInt bs; 6628 6629 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6630 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6631 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6632 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6633 ierr = MatAssemblyBegin(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6634 ierr = MatAssemblyEnd(tmat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6635 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6636 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6637 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6638 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6639 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6640 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6641 ierr = MatConvert(tmat,MATAIJ,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6642 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6643 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6644 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6645 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6646 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6647 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6648 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6649 6650 /* check */ 6651 if (pcbddc->dbg_flag) { 6652 PetscReal error; 6653 Vec x,x_change; 6654 6655 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6656 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6657 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6658 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6659 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6660 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6661 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6662 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6663 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6664 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6665 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6666 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6667 if (error > PETSC_SMALL) { 6668 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e",error); 6669 } 6670 ierr = VecDestroy(&x);CHKERRQ(ierr); 6671 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6672 } 6673 /* adapt sub_schurs computed (if any) */ 6674 if (pcbddc->use_deluxe_scaling) { 6675 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6676 6677 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6678 if (sub_schurs && sub_schurs->S_Ej_all) { 6679 Mat S_new,tmat; 6680 IS is_all_N,is_V_Sall = NULL; 6681 6682 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6683 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6684 if (pcbddc->deluxe_zerorows) { 6685 ISLocalToGlobalMapping NtoSall; 6686 IS is_V; 6687 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6688 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6689 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6690 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6691 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6692 } 6693 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6694 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6695 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6696 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6697 if (pcbddc->deluxe_zerorows) { 6698 const PetscScalar *array; 6699 const PetscInt *idxs_V,*idxs_all; 6700 PetscInt i,n_V; 6701 6702 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6703 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6704 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6705 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6706 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6707 for (i=0;i<n_V;i++) { 6708 PetscScalar val; 6709 PetscInt idx; 6710 6711 idx = idxs_V[i]; 6712 val = array[idxs_all[idxs_V[i]]]; 6713 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6714 } 6715 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6716 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6717 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6718 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6719 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6720 } 6721 sub_schurs->S_Ej_all = S_new; 6722 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6723 if (sub_schurs->sum_S_Ej_all) { 6724 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6725 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6726 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6727 if (pcbddc->deluxe_zerorows) { 6728 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6729 } 6730 sub_schurs->sum_S_Ej_all = S_new; 6731 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6732 } 6733 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6734 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6735 } 6736 /* destroy any change of basis context in sub_schurs */ 6737 if (sub_schurs && sub_schurs->change) { 6738 PetscInt i; 6739 6740 for (i=0;i<sub_schurs->n_subs;i++) { 6741 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6742 } 6743 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6744 } 6745 } 6746 if (pcbddc->switch_static) { /* need to save the local change */ 6747 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6748 } else { 6749 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6750 } 6751 /* determine if any process has changed the pressures locally */ 6752 pcbddc->change_interior = pcbddc->benign_have_null; 6753 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6754 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6755 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6756 pcbddc->use_qr_single = qr_needed; 6757 } 6758 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6759 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6760 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6761 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6762 } else { 6763 Mat benign_global = NULL; 6764 if (pcbddc->benign_have_null) { 6765 Mat M; 6766 6767 pcbddc->change_interior = PETSC_TRUE; 6768 ierr = VecCopy(matis->counter,pcis->vec1_N);CHKERRQ(ierr); 6769 ierr = VecReciprocal(pcis->vec1_N);CHKERRQ(ierr); 6770 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&benign_global);CHKERRQ(ierr); 6771 if (pcbddc->benign_change) { 6772 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6773 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6774 } else { 6775 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&M);CHKERRQ(ierr); 6776 ierr = MatDiagonalSet(M,pcis->vec1_N,INSERT_VALUES);CHKERRQ(ierr); 6777 } 6778 ierr = MatISSetLocalMat(benign_global,M);CHKERRQ(ierr); 6779 ierr = MatDestroy(&M);CHKERRQ(ierr); 6780 ierr = MatAssemblyBegin(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6781 ierr = MatAssemblyEnd(benign_global,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6782 } 6783 if (pcbddc->user_ChangeOfBasisMatrix) { 6784 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6785 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6786 } else if (pcbddc->benign_have_null) { 6787 pcbddc->ChangeOfBasisMatrix = benign_global; 6788 } 6789 } 6790 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6791 IS is_global; 6792 const PetscInt *gidxs; 6793 6794 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6795 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6796 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6797 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6798 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6799 } 6800 } 6801 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6802 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6803 } 6804 6805 if (!pcbddc->fake_change) { 6806 /* add pressure dofs to set of primal nodes for numbering purposes */ 6807 for (i=0;i<pcbddc->benign_n;i++) { 6808 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6809 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6810 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6811 pcbddc->local_primal_size_cc++; 6812 pcbddc->local_primal_size++; 6813 } 6814 6815 /* check if a new primal space has been introduced (also take into account benign trick) */ 6816 pcbddc->new_primal_space_local = PETSC_TRUE; 6817 if (olocal_primal_size == pcbddc->local_primal_size) { 6818 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6819 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6820 if (!pcbddc->new_primal_space_local) { 6821 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6822 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6823 } 6824 } 6825 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6826 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6827 } 6828 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6829 6830 /* flush dbg viewer */ 6831 if (pcbddc->dbg_flag) { 6832 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6833 } 6834 6835 /* free workspace */ 6836 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6837 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6838 if (!pcbddc->adaptive_selection) { 6839 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6840 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6841 } else { 6842 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6843 pcbddc->adaptive_constraints_idxs_ptr, 6844 pcbddc->adaptive_constraints_data_ptr, 6845 pcbddc->adaptive_constraints_idxs, 6846 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6847 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6848 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6849 } 6850 PetscFunctionReturn(0); 6851 } 6852 /* #undef PETSC_MISSING_LAPACK_GESVD */ 6853 6854 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6855 { 6856 ISLocalToGlobalMapping map; 6857 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6858 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6859 PetscInt i,N; 6860 PetscBool rcsr = PETSC_FALSE; 6861 PetscErrorCode ierr; 6862 6863 PetscFunctionBegin; 6864 if (pcbddc->recompute_topography) { 6865 pcbddc->graphanalyzed = PETSC_FALSE; 6866 /* Reset previously computed graph */ 6867 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6868 /* Init local Graph struct */ 6869 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6870 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6871 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6872 6873 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6874 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6875 } 6876 /* Check validity of the csr graph passed in by the user */ 6877 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6878 6879 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6880 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6881 PetscInt *xadj,*adjncy; 6882 PetscInt nvtxs; 6883 PetscBool flg_row=PETSC_FALSE; 6884 6885 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6886 if (flg_row) { 6887 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6888 pcbddc->computed_rowadj = PETSC_TRUE; 6889 } 6890 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6891 rcsr = PETSC_TRUE; 6892 } 6893 if (pcbddc->dbg_flag) { 6894 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6895 } 6896 6897 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 6898 PetscReal *lcoords; 6899 PetscInt n; 6900 MPI_Datatype dimrealtype; 6901 6902 if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 6903 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 6904 ierr = PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);CHKERRQ(ierr); 6905 ierr = MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);CHKERRQ(ierr); 6906 ierr = MPI_Type_commit(&dimrealtype);CHKERRQ(ierr); 6907 ierr = PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 6908 ierr = PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 6909 ierr = MPI_Type_free(&dimrealtype);CHKERRQ(ierr); 6910 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 6911 6912 pcbddc->mat_graph->coords = lcoords; 6913 pcbddc->mat_graph->cloc = PETSC_TRUE; 6914 pcbddc->mat_graph->cnloc = n; 6915 } 6916 if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 6917 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected); 6918 6919 /* Setup of Graph */ 6920 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6921 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6922 6923 /* attach info on disconnected subdomains if present */ 6924 if (pcbddc->n_local_subs) { 6925 PetscInt *local_subs; 6926 6927 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6928 for (i=0;i<pcbddc->n_local_subs;i++) { 6929 const PetscInt *idxs; 6930 PetscInt nl,j; 6931 6932 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6933 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6934 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6935 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6936 } 6937 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6938 pcbddc->mat_graph->local_subs = local_subs; 6939 } 6940 } 6941 6942 if (!pcbddc->graphanalyzed) { 6943 /* Graph's connected components analysis */ 6944 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6945 pcbddc->graphanalyzed = PETSC_TRUE; 6946 } 6947 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6948 PetscFunctionReturn(0); 6949 } 6950 6951 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6952 { 6953 PetscInt i,j; 6954 PetscScalar *alphas; 6955 PetscErrorCode ierr; 6956 6957 PetscFunctionBegin; 6958 if (!n) PetscFunctionReturn(0); 6959 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6960 ierr = VecNormalize(vecs[0],NULL);CHKERRQ(ierr); 6961 for (i=1;i<n;i++) { 6962 ierr = VecMDot(vecs[i],i,vecs,alphas);CHKERRQ(ierr); 6963 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 6964 ierr = VecMAXPY(vecs[i],i,alphas,vecs);CHKERRQ(ierr); 6965 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6966 } 6967 ierr = PetscFree(alphas);CHKERRQ(ierr); 6968 PetscFunctionReturn(0); 6969 } 6970 6971 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6972 { 6973 Mat A; 6974 PetscInt n_neighs,*neighs,*n_shared,**shared; 6975 PetscMPIInt size,rank,color; 6976 PetscInt *xadj,*adjncy; 6977 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6978 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6979 PetscInt void_procs,*procs_candidates = NULL; 6980 PetscInt xadj_count,*count; 6981 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6982 PetscSubcomm psubcomm; 6983 MPI_Comm subcomm; 6984 PetscErrorCode ierr; 6985 6986 PetscFunctionBegin; 6987 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6988 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6989 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6990 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6991 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6992 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %D",*n_subdomains); 6993 6994 if (have_void) *have_void = PETSC_FALSE; 6995 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6996 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6997 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6998 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6999 im_active = !!n; 7000 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7001 void_procs = size - active_procs; 7002 /* get ranks of of non-active processes in mat communicator */ 7003 if (void_procs) { 7004 PetscInt ncand; 7005 7006 if (have_void) *have_void = PETSC_TRUE; 7007 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 7008 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7009 for (i=0,ncand=0;i<size;i++) { 7010 if (!procs_candidates[i]) { 7011 procs_candidates[ncand++] = i; 7012 } 7013 } 7014 /* force n_subdomains to be not greater that the number of non-active processes */ 7015 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7016 } 7017 7018 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7019 number of subdomains requested 1 -> send to master or first candidate in voids */ 7020 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 7021 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7022 PetscInt issize,isidx,dest; 7023 if (*n_subdomains == 1) dest = 0; 7024 else dest = rank; 7025 if (im_active) { 7026 issize = 1; 7027 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7028 isidx = procs_candidates[dest]; 7029 } else { 7030 isidx = dest; 7031 } 7032 } else { 7033 issize = 0; 7034 isidx = -1; 7035 } 7036 if (*n_subdomains != 1) *n_subdomains = active_procs; 7037 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 7038 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7039 PetscFunctionReturn(0); 7040 } 7041 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 7042 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 7043 threshold = PetscMax(threshold,2); 7044 7045 /* Get info on mapping */ 7046 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7047 7048 /* build local CSR graph of subdomains' connectivity */ 7049 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 7050 xadj[0] = 0; 7051 xadj[1] = PetscMax(n_neighs-1,0); 7052 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 7053 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 7054 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 7055 for (i=1;i<n_neighs;i++) 7056 for (j=0;j<n_shared[i];j++) 7057 count[shared[i][j]] += 1; 7058 7059 xadj_count = 0; 7060 for (i=1;i<n_neighs;i++) { 7061 for (j=0;j<n_shared[i];j++) { 7062 if (count[shared[i][j]] < threshold) { 7063 adjncy[xadj_count] = neighs[i]; 7064 adjncy_wgt[xadj_count] = n_shared[i]; 7065 xadj_count++; 7066 break; 7067 } 7068 } 7069 } 7070 xadj[1] = xadj_count; 7071 ierr = PetscFree(count);CHKERRQ(ierr); 7072 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7073 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7074 7075 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 7076 7077 /* Restrict work on active processes only */ 7078 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 7079 if (void_procs) { 7080 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 7081 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 7082 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 7083 subcomm = PetscSubcommChild(psubcomm); 7084 } else { 7085 psubcomm = NULL; 7086 subcomm = PetscObjectComm((PetscObject)mat); 7087 } 7088 7089 v_wgt = NULL; 7090 if (!color) { 7091 ierr = PetscFree(xadj);CHKERRQ(ierr); 7092 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7093 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7094 } else { 7095 Mat subdomain_adj; 7096 IS new_ranks,new_ranks_contig; 7097 MatPartitioning partitioner; 7098 PetscInt rstart=0,rend=0; 7099 PetscInt *is_indices,*oldranks; 7100 PetscMPIInt size; 7101 PetscBool aggregate; 7102 7103 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 7104 if (void_procs) { 7105 PetscInt prank = rank; 7106 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 7107 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 7108 for (i=0;i<xadj[1];i++) { 7109 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 7110 } 7111 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7112 } else { 7113 oldranks = NULL; 7114 } 7115 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7116 if (aggregate) { /* TODO: all this part could be made more efficient */ 7117 PetscInt lrows,row,ncols,*cols; 7118 PetscMPIInt nrank; 7119 PetscScalar *vals; 7120 7121 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 7122 lrows = 0; 7123 if (nrank<redprocs) { 7124 lrows = size/redprocs; 7125 if (nrank<size%redprocs) lrows++; 7126 } 7127 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 7128 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 7129 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7130 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7131 row = nrank; 7132 ncols = xadj[1]-xadj[0]; 7133 cols = adjncy; 7134 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 7135 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7136 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 7137 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7138 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7139 ierr = PetscFree(xadj);CHKERRQ(ierr); 7140 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7141 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7142 ierr = PetscFree(vals);CHKERRQ(ierr); 7143 if (use_vwgt) { 7144 Vec v; 7145 const PetscScalar *array; 7146 PetscInt nl; 7147 7148 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 7149 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 7150 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 7151 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 7152 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 7153 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 7154 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 7155 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7156 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 7157 ierr = VecDestroy(&v);CHKERRQ(ierr); 7158 } 7159 } else { 7160 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 7161 if (use_vwgt) { 7162 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 7163 v_wgt[0] = n; 7164 } 7165 } 7166 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 7167 7168 /* Partition */ 7169 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 7170 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 7171 if (v_wgt) { 7172 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 7173 } 7174 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7175 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 7176 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 7177 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 7178 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 7179 7180 /* renumber new_ranks to avoid "holes" in new set of processors */ 7181 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 7182 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 7183 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7184 if (!aggregate) { 7185 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7186 #if defined(PETSC_USE_DEBUG) 7187 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7188 #endif 7189 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7190 } else if (oldranks) { 7191 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7192 } else { 7193 ranks_send_to_idx[0] = is_indices[0]; 7194 } 7195 } else { 7196 PetscInt idx = 0; 7197 PetscMPIInt tag; 7198 MPI_Request *reqs; 7199 7200 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 7201 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 7202 for (i=rstart;i<rend;i++) { 7203 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 7204 } 7205 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 7206 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7207 ierr = PetscFree(reqs);CHKERRQ(ierr); 7208 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7209 #if defined(PETSC_USE_DEBUG) 7210 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7211 #endif 7212 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7213 } else if (oldranks) { 7214 ranks_send_to_idx[0] = oldranks[idx]; 7215 } else { 7216 ranks_send_to_idx[0] = idx; 7217 } 7218 } 7219 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7220 /* clean up */ 7221 ierr = PetscFree(oldranks);CHKERRQ(ierr); 7222 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 7223 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 7224 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 7225 } 7226 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 7227 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7228 7229 /* assemble parallel IS for sends */ 7230 i = 1; 7231 if (!color) i=0; 7232 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 7233 PetscFunctionReturn(0); 7234 } 7235 7236 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7237 7238 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7239 { 7240 Mat local_mat; 7241 IS is_sends_internal; 7242 PetscInt rows,cols,new_local_rows; 7243 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7244 PetscBool ismatis,isdense,newisdense,destroy_mat; 7245 ISLocalToGlobalMapping l2gmap; 7246 PetscInt* l2gmap_indices; 7247 const PetscInt* is_indices; 7248 MatType new_local_type; 7249 /* buffers */ 7250 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7251 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7252 PetscInt *recv_buffer_idxs_local; 7253 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 7254 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7255 /* MPI */ 7256 MPI_Comm comm,comm_n; 7257 PetscSubcomm subcomm; 7258 PetscMPIInt n_sends,n_recvs,size; 7259 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7260 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7261 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7262 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7263 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7264 PetscErrorCode ierr; 7265 7266 PetscFunctionBegin; 7267 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7268 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7269 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7270 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7271 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7272 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7273 PetscValidLogicalCollectiveBool(mat,reuse,6); 7274 PetscValidLogicalCollectiveInt(mat,nis,8); 7275 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7276 if (nvecs) { 7277 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7278 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7279 } 7280 /* further checks */ 7281 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7282 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7283 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7284 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7285 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7286 if (reuse && *mat_n) { 7287 PetscInt mrows,mcols,mnrows,mncols; 7288 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7289 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7290 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7291 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7292 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7293 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7294 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7295 } 7296 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7297 PetscValidLogicalCollectiveInt(mat,bs,0); 7298 7299 /* prepare IS for sending if not provided */ 7300 if (!is_sends) { 7301 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7302 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7303 } else { 7304 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7305 is_sends_internal = is_sends; 7306 } 7307 7308 /* get comm */ 7309 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7310 7311 /* compute number of sends */ 7312 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7313 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7314 7315 /* compute number of receives */ 7316 ierr = MPI_Comm_size(comm,&size);CHKERRQ(ierr); 7317 ierr = PetscMalloc1(size,&iflags);CHKERRQ(ierr); 7318 ierr = PetscMemzero(iflags,size*sizeof(*iflags));CHKERRQ(ierr); 7319 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7320 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7321 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7322 ierr = PetscFree(iflags);CHKERRQ(ierr); 7323 7324 /* restrict comm if requested */ 7325 subcomm = 0; 7326 destroy_mat = PETSC_FALSE; 7327 if (restrict_comm) { 7328 PetscMPIInt color,subcommsize; 7329 7330 color = 0; 7331 if (restrict_full) { 7332 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7333 } else { 7334 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7335 } 7336 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7337 subcommsize = size - subcommsize; 7338 /* check if reuse has been requested */ 7339 if (reuse) { 7340 if (*mat_n) { 7341 PetscMPIInt subcommsize2; 7342 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 7343 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7344 comm_n = PetscObjectComm((PetscObject)*mat_n); 7345 } else { 7346 comm_n = PETSC_COMM_SELF; 7347 } 7348 } else { /* MAT_INITIAL_MATRIX */ 7349 PetscMPIInt rank; 7350 7351 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 7352 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7353 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7354 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7355 comm_n = PetscSubcommChild(subcomm); 7356 } 7357 /* flag to destroy *mat_n if not significative */ 7358 if (color) destroy_mat = PETSC_TRUE; 7359 } else { 7360 comm_n = comm; 7361 } 7362 7363 /* prepare send/receive buffers */ 7364 ierr = PetscMalloc1(size,&ilengths_idxs);CHKERRQ(ierr); 7365 ierr = PetscMemzero(ilengths_idxs,size*sizeof(*ilengths_idxs));CHKERRQ(ierr); 7366 ierr = PetscMalloc1(size,&ilengths_vals);CHKERRQ(ierr); 7367 ierr = PetscMemzero(ilengths_vals,size*sizeof(*ilengths_vals));CHKERRQ(ierr); 7368 if (nis) { 7369 ierr = PetscCalloc1(size,&ilengths_idxs_is);CHKERRQ(ierr); 7370 } 7371 7372 /* Get data from local matrices */ 7373 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7374 /* TODO: See below some guidelines on how to prepare the local buffers */ 7375 /* 7376 send_buffer_vals should contain the raw values of the local matrix 7377 send_buffer_idxs should contain: 7378 - MatType_PRIVATE type 7379 - PetscInt size_of_l2gmap 7380 - PetscInt global_row_indices[size_of_l2gmap] 7381 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7382 */ 7383 else { 7384 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7385 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7386 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7387 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7388 send_buffer_idxs[1] = i; 7389 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7390 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 7391 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7392 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7393 for (i=0;i<n_sends;i++) { 7394 ilengths_vals[is_indices[i]] = len*len; 7395 ilengths_idxs[is_indices[i]] = len+2; 7396 } 7397 } 7398 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7399 /* additional is (if any) */ 7400 if (nis) { 7401 PetscMPIInt psum; 7402 PetscInt j; 7403 for (j=0,psum=0;j<nis;j++) { 7404 PetscInt plen; 7405 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7406 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7407 psum += len+1; /* indices + lenght */ 7408 } 7409 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7410 for (j=0,psum=0;j<nis;j++) { 7411 PetscInt plen; 7412 const PetscInt *is_array_idxs; 7413 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7414 send_buffer_idxs_is[psum] = plen; 7415 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7416 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 7417 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7418 psum += plen+1; /* indices + lenght */ 7419 } 7420 for (i=0;i<n_sends;i++) { 7421 ilengths_idxs_is[is_indices[i]] = psum; 7422 } 7423 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7424 } 7425 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7426 7427 buf_size_idxs = 0; 7428 buf_size_vals = 0; 7429 buf_size_idxs_is = 0; 7430 buf_size_vecs = 0; 7431 for (i=0;i<n_recvs;i++) { 7432 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7433 buf_size_vals += (PetscInt)olengths_vals[i]; 7434 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7435 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7436 } 7437 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7438 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7439 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7440 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7441 7442 /* get new tags for clean communications */ 7443 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7444 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7445 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7446 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7447 7448 /* allocate for requests */ 7449 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7450 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7451 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7452 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7453 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7454 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7455 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7456 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7457 7458 /* communications */ 7459 ptr_idxs = recv_buffer_idxs; 7460 ptr_vals = recv_buffer_vals; 7461 ptr_idxs_is = recv_buffer_idxs_is; 7462 ptr_vecs = recv_buffer_vecs; 7463 for (i=0;i<n_recvs;i++) { 7464 source_dest = onodes[i]; 7465 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7466 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7467 ptr_idxs += olengths_idxs[i]; 7468 ptr_vals += olengths_vals[i]; 7469 if (nis) { 7470 source_dest = onodes_is[i]; 7471 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7472 ptr_idxs_is += olengths_idxs_is[i]; 7473 } 7474 if (nvecs) { 7475 source_dest = onodes[i]; 7476 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7477 ptr_vecs += olengths_idxs[i]-2; 7478 } 7479 } 7480 for (i=0;i<n_sends;i++) { 7481 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7482 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7483 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7484 if (nis) { 7485 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7486 } 7487 if (nvecs) { 7488 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7489 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7490 } 7491 } 7492 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7493 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7494 7495 /* assemble new l2g map */ 7496 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7497 ptr_idxs = recv_buffer_idxs; 7498 new_local_rows = 0; 7499 for (i=0;i<n_recvs;i++) { 7500 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7501 ptr_idxs += olengths_idxs[i]; 7502 } 7503 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7504 ptr_idxs = recv_buffer_idxs; 7505 new_local_rows = 0; 7506 for (i=0;i<n_recvs;i++) { 7507 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 7508 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7509 ptr_idxs += olengths_idxs[i]; 7510 } 7511 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7512 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7513 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7514 7515 /* infer new local matrix type from received local matrices type */ 7516 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7517 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7518 if (n_recvs) { 7519 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7520 ptr_idxs = recv_buffer_idxs; 7521 for (i=0;i<n_recvs;i++) { 7522 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7523 new_local_type_private = MATAIJ_PRIVATE; 7524 break; 7525 } 7526 ptr_idxs += olengths_idxs[i]; 7527 } 7528 switch (new_local_type_private) { 7529 case MATDENSE_PRIVATE: 7530 new_local_type = MATSEQAIJ; 7531 bs = 1; 7532 break; 7533 case MATAIJ_PRIVATE: 7534 new_local_type = MATSEQAIJ; 7535 bs = 1; 7536 break; 7537 case MATBAIJ_PRIVATE: 7538 new_local_type = MATSEQBAIJ; 7539 break; 7540 case MATSBAIJ_PRIVATE: 7541 new_local_type = MATSEQSBAIJ; 7542 break; 7543 default: 7544 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7545 break; 7546 } 7547 } else { /* by default, new_local_type is seqaij */ 7548 new_local_type = MATSEQAIJ; 7549 bs = 1; 7550 } 7551 7552 /* create MATIS object if needed */ 7553 if (!reuse) { 7554 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7555 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7556 } else { 7557 /* it also destroys the local matrices */ 7558 if (*mat_n) { 7559 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7560 } else { /* this is a fake object */ 7561 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7562 } 7563 } 7564 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7565 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7566 7567 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7568 7569 /* Global to local map of received indices */ 7570 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7571 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7572 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7573 7574 /* restore attributes -> type of incoming data and its size */ 7575 buf_size_idxs = 0; 7576 for (i=0;i<n_recvs;i++) { 7577 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7578 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7579 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7580 } 7581 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7582 7583 /* set preallocation */ 7584 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7585 if (!newisdense) { 7586 PetscInt *new_local_nnz=0; 7587 7588 ptr_idxs = recv_buffer_idxs_local; 7589 if (n_recvs) { 7590 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7591 } 7592 for (i=0;i<n_recvs;i++) { 7593 PetscInt j; 7594 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7595 for (j=0;j<*(ptr_idxs+1);j++) { 7596 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7597 } 7598 } else { 7599 /* TODO */ 7600 } 7601 ptr_idxs += olengths_idxs[i]; 7602 } 7603 if (new_local_nnz) { 7604 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7605 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7606 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7607 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7608 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7609 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7610 } else { 7611 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7612 } 7613 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7614 } else { 7615 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7616 } 7617 7618 /* set values */ 7619 ptr_vals = recv_buffer_vals; 7620 ptr_idxs = recv_buffer_idxs_local; 7621 for (i=0;i<n_recvs;i++) { 7622 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7623 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7624 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7625 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7626 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7627 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7628 } else { 7629 /* TODO */ 7630 } 7631 ptr_idxs += olengths_idxs[i]; 7632 ptr_vals += olengths_vals[i]; 7633 } 7634 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7635 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7636 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7637 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7638 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7639 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7640 7641 #if 0 7642 if (!restrict_comm) { /* check */ 7643 Vec lvec,rvec; 7644 PetscReal infty_error; 7645 7646 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7647 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7648 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7649 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7650 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7651 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7652 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7653 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7654 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7655 } 7656 #endif 7657 7658 /* assemble new additional is (if any) */ 7659 if (nis) { 7660 PetscInt **temp_idxs,*count_is,j,psum; 7661 7662 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7663 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7664 ptr_idxs = recv_buffer_idxs_is; 7665 psum = 0; 7666 for (i=0;i<n_recvs;i++) { 7667 for (j=0;j<nis;j++) { 7668 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7669 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7670 psum += plen; 7671 ptr_idxs += plen+1; /* shift pointer to received data */ 7672 } 7673 } 7674 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7675 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7676 for (i=1;i<nis;i++) { 7677 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7678 } 7679 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7680 ptr_idxs = recv_buffer_idxs_is; 7681 for (i=0;i<n_recvs;i++) { 7682 for (j=0;j<nis;j++) { 7683 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7684 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7685 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7686 ptr_idxs += plen+1; /* shift pointer to received data */ 7687 } 7688 } 7689 for (i=0;i<nis;i++) { 7690 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7691 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7692 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7693 } 7694 ierr = PetscFree(count_is);CHKERRQ(ierr); 7695 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7696 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7697 } 7698 /* free workspace */ 7699 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7700 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7701 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7702 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7703 if (isdense) { 7704 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7705 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7706 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7707 } else { 7708 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7709 } 7710 if (nis) { 7711 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7712 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7713 } 7714 7715 if (nvecs) { 7716 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7717 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7718 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7719 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7720 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7721 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7722 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7723 /* set values */ 7724 ptr_vals = recv_buffer_vecs; 7725 ptr_idxs = recv_buffer_idxs_local; 7726 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7727 for (i=0;i<n_recvs;i++) { 7728 PetscInt j; 7729 for (j=0;j<*(ptr_idxs+1);j++) { 7730 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7731 } 7732 ptr_idxs += olengths_idxs[i]; 7733 ptr_vals += olengths_idxs[i]-2; 7734 } 7735 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7736 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7737 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7738 } 7739 7740 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7741 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7742 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7743 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7744 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7745 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7746 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7747 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7748 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7749 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7750 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7751 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7752 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7753 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7754 ierr = PetscFree(onodes);CHKERRQ(ierr); 7755 if (nis) { 7756 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7757 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7758 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7759 } 7760 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7761 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7762 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7763 for (i=0;i<nis;i++) { 7764 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7765 } 7766 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7767 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7768 } 7769 *mat_n = NULL; 7770 } 7771 PetscFunctionReturn(0); 7772 } 7773 7774 /* temporary hack into ksp private data structure */ 7775 #include <petsc/private/kspimpl.h> 7776 7777 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7778 { 7779 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7780 PC_IS *pcis = (PC_IS*)pc->data; 7781 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7782 Mat coarsedivudotp = NULL; 7783 Mat coarseG,t_coarse_mat_is; 7784 MatNullSpace CoarseNullSpace = NULL; 7785 ISLocalToGlobalMapping coarse_islg; 7786 IS coarse_is,*isarray; 7787 PetscInt i,im_active=-1,active_procs=-1; 7788 PetscInt nis,nisdofs,nisneu,nisvert; 7789 PetscInt coarse_eqs_per_proc; 7790 PC pc_temp; 7791 PCType coarse_pc_type; 7792 KSPType coarse_ksp_type; 7793 PetscBool multilevel_requested,multilevel_allowed; 7794 PetscBool coarse_reuse; 7795 PetscInt ncoarse,nedcfield; 7796 PetscBool compute_vecs = PETSC_FALSE; 7797 PetscScalar *array; 7798 MatReuse coarse_mat_reuse; 7799 PetscBool restr, full_restr, have_void; 7800 PetscMPIInt size; 7801 PetscErrorCode ierr; 7802 7803 PetscFunctionBegin; 7804 ierr = PetscLogEventBegin(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 7805 /* Assign global numbering to coarse dofs */ 7806 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7807 PetscInt ocoarse_size; 7808 compute_vecs = PETSC_TRUE; 7809 7810 pcbddc->new_primal_space = PETSC_TRUE; 7811 ocoarse_size = pcbddc->coarse_size; 7812 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7813 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7814 /* see if we can avoid some work */ 7815 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7816 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7817 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7818 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7819 coarse_reuse = PETSC_FALSE; 7820 } else { /* we can safely reuse already computed coarse matrix */ 7821 coarse_reuse = PETSC_TRUE; 7822 } 7823 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7824 coarse_reuse = PETSC_FALSE; 7825 } 7826 /* reset any subassembling information */ 7827 if (!coarse_reuse || pcbddc->recompute_topography) { 7828 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7829 } 7830 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7831 coarse_reuse = PETSC_TRUE; 7832 } 7833 /* assemble coarse matrix */ 7834 if (coarse_reuse && pcbddc->coarse_ksp) { 7835 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7836 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7837 coarse_mat_reuse = MAT_REUSE_MATRIX; 7838 } else { 7839 coarse_mat = NULL; 7840 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7841 } 7842 7843 /* creates temporary l2gmap and IS for coarse indexes */ 7844 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7845 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7846 7847 /* creates temporary MATIS object for coarse matrix */ 7848 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7849 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7850 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7851 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7852 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7853 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7854 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7855 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7856 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7857 7858 /* count "active" (i.e. with positive local size) and "void" processes */ 7859 im_active = !!(pcis->n); 7860 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7861 7862 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7863 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7864 /* full_restr : just use the receivers from the subassembling pattern */ 7865 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&size);CHKERRQ(ierr); 7866 coarse_mat_is = NULL; 7867 multilevel_allowed = PETSC_FALSE; 7868 multilevel_requested = PETSC_FALSE; 7869 coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7870 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7871 if (pcbddc->coarse_size <= pcbddc->coarse_eqs_limit) multilevel_requested = PETSC_FALSE; 7872 if (multilevel_requested) { 7873 ncoarse = active_procs/pcbddc->coarsening_ratio; 7874 restr = PETSC_FALSE; 7875 full_restr = PETSC_FALSE; 7876 } else { 7877 ncoarse = pcbddc->coarse_size/coarse_eqs_per_proc + !!(pcbddc->coarse_size%coarse_eqs_per_proc); 7878 restr = PETSC_TRUE; 7879 full_restr = PETSC_TRUE; 7880 } 7881 if (!pcbddc->coarse_size || size == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7882 ncoarse = PetscMax(1,ncoarse); 7883 if (!pcbddc->coarse_subassembling) { 7884 if (pcbddc->coarsening_ratio > 1) { 7885 if (multilevel_requested) { 7886 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7887 } else { 7888 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7889 } 7890 } else { 7891 PetscMPIInt rank; 7892 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7893 have_void = (active_procs == (PetscInt)size) ? PETSC_FALSE : PETSC_TRUE; 7894 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7895 } 7896 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7897 PetscInt psum; 7898 if (pcbddc->coarse_ksp) psum = 1; 7899 else psum = 0; 7900 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7901 if (ncoarse < size) have_void = PETSC_TRUE; 7902 } 7903 /* determine if we can go multilevel */ 7904 if (multilevel_requested) { 7905 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7906 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7907 } 7908 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7909 7910 /* dump subassembling pattern */ 7911 if (pcbddc->dbg_flag && multilevel_allowed) { 7912 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7913 } 7914 7915 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7916 nedcfield = -1; 7917 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7918 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7919 const PetscInt *idxs; 7920 ISLocalToGlobalMapping tmap; 7921 7922 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7923 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7924 /* allocate space for temporary storage */ 7925 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7926 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7927 /* allocate for IS array */ 7928 nisdofs = pcbddc->n_ISForDofsLocal; 7929 if (pcbddc->nedclocal) { 7930 if (pcbddc->nedfield > -1) { 7931 nedcfield = pcbddc->nedfield; 7932 } else { 7933 nedcfield = 0; 7934 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%D)",nisdofs); 7935 nisdofs = 1; 7936 } 7937 } 7938 nisneu = !!pcbddc->NeumannBoundariesLocal; 7939 nisvert = 0; /* nisvert is not used */ 7940 nis = nisdofs + nisneu + nisvert; 7941 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7942 /* dofs splitting */ 7943 for (i=0;i<nisdofs;i++) { 7944 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7945 if (nedcfield != i) { 7946 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7947 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7948 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7949 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7950 } else { 7951 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7952 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7953 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7954 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %D != %D",tsize,nout); 7955 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7956 } 7957 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7958 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7959 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7960 } 7961 /* neumann boundaries */ 7962 if (pcbddc->NeumannBoundariesLocal) { 7963 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7964 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7965 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7966 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7967 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7968 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7969 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7970 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7971 } 7972 /* free memory */ 7973 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7974 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7975 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7976 } else { 7977 nis = 0; 7978 nisdofs = 0; 7979 nisneu = 0; 7980 nisvert = 0; 7981 isarray = NULL; 7982 } 7983 /* destroy no longer needed map */ 7984 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7985 7986 /* subassemble */ 7987 if (multilevel_allowed) { 7988 Vec vp[1]; 7989 PetscInt nvecs = 0; 7990 PetscBool reuse,reuser; 7991 7992 if (coarse_mat) reuse = PETSC_TRUE; 7993 else reuse = PETSC_FALSE; 7994 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7995 vp[0] = NULL; 7996 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7997 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7998 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7999 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 8000 nvecs = 1; 8001 8002 if (pcbddc->divudotp) { 8003 Mat B,loc_divudotp; 8004 Vec v,p; 8005 IS dummy; 8006 PetscInt np; 8007 8008 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 8009 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 8010 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 8011 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 8012 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 8013 ierr = VecSet(p,1.);CHKERRQ(ierr); 8014 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 8015 ierr = VecDestroy(&p);CHKERRQ(ierr); 8016 ierr = MatDestroy(&B);CHKERRQ(ierr); 8017 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 8018 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 8019 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 8020 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 8021 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 8022 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8023 ierr = VecDestroy(&v);CHKERRQ(ierr); 8024 } 8025 } 8026 if (reuser) { 8027 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8028 } else { 8029 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8030 } 8031 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8032 PetscScalar *arraym,*arrayv; 8033 PetscInt nl; 8034 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 8035 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 8036 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8037 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 8038 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 8039 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 8040 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8041 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 8042 } else { 8043 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 8044 } 8045 } else { 8046 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 8047 } 8048 if (coarse_mat_is || coarse_mat) { 8049 PetscMPIInt size; 8050 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 8051 if (!multilevel_allowed) { 8052 ierr = MatConvert(coarse_mat_is,MATAIJ,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 8053 } else { 8054 Mat A; 8055 8056 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8057 if (coarse_mat_is) { 8058 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8059 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 8060 coarse_mat = coarse_mat_is; 8061 } 8062 /* be sure we don't have MatSeqDENSE as local mat */ 8063 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 8064 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 8065 } 8066 } 8067 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 8068 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 8069 8070 /* create local to global scatters for coarse problem */ 8071 if (compute_vecs) { 8072 PetscInt lrows; 8073 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 8074 if (coarse_mat) { 8075 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 8076 } else { 8077 lrows = 0; 8078 } 8079 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 8080 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 8081 ierr = VecSetType(pcbddc->coarse_vec,coarse_mat ? coarse_mat->defaultvectype : VECSTANDARD);CHKERRQ(ierr); 8082 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8083 ierr = VecScatterCreateWithData(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8084 } 8085 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 8086 8087 /* set defaults for coarse KSP and PC */ 8088 if (multilevel_allowed) { 8089 coarse_ksp_type = KSPRICHARDSON; 8090 coarse_pc_type = PCBDDC; 8091 } else { 8092 coarse_ksp_type = KSPPREONLY; 8093 coarse_pc_type = PCREDUNDANT; 8094 } 8095 8096 /* print some info if requested */ 8097 if (pcbddc->dbg_flag) { 8098 if (!multilevel_allowed) { 8099 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8100 if (multilevel_requested) { 8101 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %D (active processes %D, coarsening ratio %D)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8102 } else if (pcbddc->max_levels) { 8103 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%D)\n",pcbddc->max_levels);CHKERRQ(ierr); 8104 } 8105 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8106 } 8107 } 8108 8109 /* communicate coarse discrete gradient */ 8110 coarseG = NULL; 8111 if (pcbddc->nedcG && multilevel_allowed) { 8112 MPI_Comm ccomm; 8113 if (coarse_mat) { 8114 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8115 } else { 8116 ccomm = MPI_COMM_NULL; 8117 } 8118 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 8119 } 8120 8121 /* create the coarse KSP object only once with defaults */ 8122 if (coarse_mat) { 8123 PetscBool isredundant,isnn,isbddc; 8124 PetscViewer dbg_viewer = NULL; 8125 8126 if (pcbddc->dbg_flag) { 8127 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8128 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8129 } 8130 if (!pcbddc->coarse_ksp) { 8131 char prefix[256],str_level[16]; 8132 size_t len; 8133 8134 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 8135 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8136 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 8137 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 8138 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8139 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 8140 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 8141 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8142 /* TODO is this logic correct? should check for coarse_mat type */ 8143 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8144 /* prefix */ 8145 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 8146 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 8147 if (!pcbddc->current_level) { 8148 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,sizeof(prefix));CHKERRQ(ierr); 8149 ierr = PetscStrlcat(prefix,"pc_bddc_coarse_",sizeof(prefix));CHKERRQ(ierr); 8150 } else { 8151 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 8152 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8153 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8154 /* Nonstandard use of PetscStrncpy() to copy only a portion of the string */ 8155 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 8156 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 8157 ierr = PetscStrlcat(prefix,str_level,sizeof(prefix));CHKERRQ(ierr); 8158 } 8159 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 8160 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8161 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8162 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8163 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8164 /* allow user customization */ 8165 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 8166 /* get some info after set from options */ 8167 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8168 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 8169 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8170 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 8171 if (multilevel_allowed && !isbddc && !isnn) { 8172 isbddc = PETSC_TRUE; 8173 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8174 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8175 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8176 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8177 } 8178 } 8179 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8180 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8181 if (nisdofs) { 8182 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 8183 for (i=0;i<nisdofs;i++) { 8184 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8185 } 8186 } 8187 if (nisneu) { 8188 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 8189 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 8190 } 8191 if (nisvert) { 8192 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 8193 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 8194 } 8195 if (coarseG) { 8196 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 8197 } 8198 8199 /* get some info after set from options */ 8200 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8201 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8202 if (isbddc && !multilevel_allowed) { 8203 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8204 isbddc = PETSC_FALSE; 8205 } 8206 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 8207 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 8208 if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) { 8209 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8210 isbddc = PETSC_TRUE; 8211 } 8212 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 8213 if (isredundant) { 8214 KSP inner_ksp; 8215 PC inner_pc; 8216 8217 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 8218 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 8219 } 8220 8221 /* parameters which miss an API */ 8222 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8223 if (isbddc) { 8224 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8225 8226 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8227 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8228 pcbddc_coarse->coarse_eqs_limit = pcbddc->coarse_eqs_limit; 8229 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8230 if (pcbddc_coarse->benign_saddle_point) { 8231 Mat coarsedivudotp_is; 8232 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8233 IS row,col; 8234 const PetscInt *gidxs; 8235 PetscInt n,st,M,N; 8236 8237 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 8238 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 8239 st = st-n; 8240 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 8241 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 8242 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 8243 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8244 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 8245 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8246 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 8247 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 8248 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 8249 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 8250 ierr = ISDestroy(&row);CHKERRQ(ierr); 8251 ierr = ISDestroy(&col);CHKERRQ(ierr); 8252 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 8253 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8254 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8255 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8256 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8257 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8258 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8259 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8260 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8261 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8262 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8263 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8264 } 8265 } 8266 8267 /* propagate symmetry info of coarse matrix */ 8268 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8269 if (pc->pmat->symmetric_set) { 8270 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8271 } 8272 if (pc->pmat->hermitian_set) { 8273 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8274 } 8275 if (pc->pmat->spd_set) { 8276 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8277 } 8278 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8279 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8280 } 8281 /* set operators */ 8282 ierr = MatViewFromOptions(coarse_mat,(PetscObject)pc,"-pc_bddc_coarse_mat_view");CHKERRQ(ierr); 8283 ierr = MatSetOptionsPrefix(coarse_mat,((PetscObject)pcbddc->coarse_ksp)->prefix);CHKERRQ(ierr); 8284 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8285 if (pcbddc->dbg_flag) { 8286 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8287 } 8288 } 8289 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8290 ierr = PetscFree(isarray);CHKERRQ(ierr); 8291 #if 0 8292 { 8293 PetscViewer viewer; 8294 char filename[256]; 8295 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8296 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8297 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8298 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8299 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8300 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8301 } 8302 #endif 8303 8304 if (pcbddc->coarse_ksp) { 8305 Vec crhs,csol; 8306 8307 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8308 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8309 if (!csol) { 8310 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8311 } 8312 if (!crhs) { 8313 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8314 } 8315 } 8316 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8317 8318 /* compute null space for coarse solver if the benign trick has been requested */ 8319 if (pcbddc->benign_null) { 8320 8321 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8322 for (i=0;i<pcbddc->benign_n;i++) { 8323 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8324 } 8325 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8326 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8327 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8328 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8329 if (coarse_mat) { 8330 Vec nullv; 8331 PetscScalar *array,*array2; 8332 PetscInt nl; 8333 8334 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8335 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8336 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8337 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8338 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 8339 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8340 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8341 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8342 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8343 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8344 } 8345 } 8346 ierr = PetscLogEventEnd(PC_BDDC_CoarseSetUp[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8347 8348 ierr = PetscLogEventBegin(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8349 if (pcbddc->coarse_ksp) { 8350 PetscBool ispreonly; 8351 8352 if (CoarseNullSpace) { 8353 PetscBool isnull; 8354 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8355 if (isnull) { 8356 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8357 } 8358 /* TODO: add local nullspaces (if any) */ 8359 } 8360 /* setup coarse ksp */ 8361 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8362 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8363 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8364 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 8365 KSP check_ksp; 8366 KSPType check_ksp_type; 8367 PC check_pc; 8368 Vec check_vec,coarse_vec; 8369 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8370 PetscInt its; 8371 PetscBool compute_eigs; 8372 PetscReal *eigs_r,*eigs_c; 8373 PetscInt neigs; 8374 const char *prefix; 8375 8376 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8377 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8378 ierr = PetscObjectIncrementTabLevel((PetscObject)check_ksp,(PetscObject)pcbddc->coarse_ksp,0);CHKERRQ(ierr); 8379 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,PETSC_FALSE);CHKERRQ(ierr); 8380 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8381 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8382 /* prevent from setup unneeded object */ 8383 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8384 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8385 if (ispreonly) { 8386 check_ksp_type = KSPPREONLY; 8387 compute_eigs = PETSC_FALSE; 8388 } else { 8389 check_ksp_type = KSPGMRES; 8390 compute_eigs = PETSC_TRUE; 8391 } 8392 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8393 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8394 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8395 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8396 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8397 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8398 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8399 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8400 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8401 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8402 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8403 /* create random vec */ 8404 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8405 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8406 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8407 /* solve coarse problem */ 8408 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8409 /* set eigenvalue estimation if preonly has not been requested */ 8410 if (compute_eigs) { 8411 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8412 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8413 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8414 if (neigs) { 8415 lambda_max = eigs_r[neigs-1]; 8416 lambda_min = eigs_r[0]; 8417 if (pcbddc->use_coarse_estimates) { 8418 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8419 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8420 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8421 } 8422 } 8423 } 8424 } 8425 8426 /* check coarse problem residual error */ 8427 if (pcbddc->dbg_flag) { 8428 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8429 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8430 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8431 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8432 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8433 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8434 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8435 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8436 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8437 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8438 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8439 if (CoarseNullSpace) { 8440 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8441 } 8442 if (compute_eigs) { 8443 PetscReal lambda_max_s,lambda_min_s; 8444 KSPConvergedReason reason; 8445 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8446 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8447 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8448 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8449 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8450 for (i=0;i<neigs;i++) { 8451 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8452 } 8453 } 8454 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8455 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8456 } 8457 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8458 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8459 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8460 if (compute_eigs) { 8461 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8462 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8463 } 8464 } 8465 } 8466 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8467 /* print additional info */ 8468 if (pcbddc->dbg_flag) { 8469 /* waits until all processes reaches this point */ 8470 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8471 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %D\n",pcbddc->current_level);CHKERRQ(ierr); 8472 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8473 } 8474 8475 /* free memory */ 8476 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8477 ierr = PetscLogEventEnd(PC_BDDC_CoarseSolver[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8478 PetscFunctionReturn(0); 8479 } 8480 8481 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8482 { 8483 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8484 PC_IS* pcis = (PC_IS*)pc->data; 8485 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8486 IS subset,subset_mult,subset_n; 8487 PetscInt local_size,coarse_size=0; 8488 PetscInt *local_primal_indices=NULL; 8489 const PetscInt *t_local_primal_indices; 8490 PetscErrorCode ierr; 8491 8492 PetscFunctionBegin; 8493 /* Compute global number of coarse dofs */ 8494 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8495 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8496 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8497 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8498 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8499 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8500 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8501 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8502 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8503 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8504 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8505 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8506 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 8507 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8508 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8509 8510 /* check numbering */ 8511 if (pcbddc->dbg_flag) { 8512 PetscScalar coarsesum,*array,*array2; 8513 PetscInt i; 8514 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8515 8516 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8517 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8518 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8519 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8520 /* counter */ 8521 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8522 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8523 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8524 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8525 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8526 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8527 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8528 for (i=0;i<pcbddc->local_primal_size;i++) { 8529 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8530 } 8531 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8532 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8533 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8534 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8535 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8536 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8537 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8538 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8539 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8540 for (i=0;i<pcis->n;i++) { 8541 if (array[i] != 0.0 && array[i] != array2[i]) { 8542 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8543 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8544 set_error = PETSC_TRUE; 8545 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8546 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %D (gid %D) owned by %D processes instead of %D!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8547 } 8548 } 8549 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8550 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8551 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8552 for (i=0;i<pcis->n;i++) { 8553 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8554 } 8555 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8556 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8557 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8558 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8559 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8560 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %D (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8561 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8562 PetscInt *gidxs; 8563 8564 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8565 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8566 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8567 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8568 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8569 for (i=0;i<pcbddc->local_primal_size;i++) { 8570 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%D]=%D (%D,%D)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8571 } 8572 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8573 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8574 } 8575 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8576 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8577 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8578 } 8579 8580 /* get back data */ 8581 *coarse_size_n = coarse_size; 8582 *local_primal_indices_n = local_primal_indices; 8583 PetscFunctionReturn(0); 8584 } 8585 8586 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8587 { 8588 IS localis_t; 8589 PetscInt i,lsize,*idxs,n; 8590 PetscScalar *vals; 8591 PetscErrorCode ierr; 8592 8593 PetscFunctionBegin; 8594 /* get indices in local ordering exploiting local to global map */ 8595 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8596 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8597 for (i=0;i<lsize;i++) vals[i] = 1.0; 8598 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8599 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8600 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8601 if (idxs) { /* multilevel guard */ 8602 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 8603 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8604 } 8605 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8606 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8607 ierr = PetscFree(vals);CHKERRQ(ierr); 8608 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8609 /* now compute set in local ordering */ 8610 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8611 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8612 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8613 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8614 for (i=0,lsize=0;i<n;i++) { 8615 if (PetscRealPart(vals[i]) > 0.5) { 8616 lsize++; 8617 } 8618 } 8619 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8620 for (i=0,lsize=0;i<n;i++) { 8621 if (PetscRealPart(vals[i]) > 0.5) { 8622 idxs[lsize++] = i; 8623 } 8624 } 8625 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8626 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8627 *localis = localis_t; 8628 PetscFunctionReturn(0); 8629 } 8630 8631 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8632 { 8633 PC_IS *pcis=(PC_IS*)pc->data; 8634 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8635 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8636 Mat S_j; 8637 PetscInt *used_xadj,*used_adjncy; 8638 PetscBool free_used_adj; 8639 PetscErrorCode ierr; 8640 8641 PetscFunctionBegin; 8642 ierr = PetscLogEventBegin(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8643 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8644 free_used_adj = PETSC_FALSE; 8645 if (pcbddc->sub_schurs_layers == -1) { 8646 used_xadj = NULL; 8647 used_adjncy = NULL; 8648 } else { 8649 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8650 used_xadj = pcbddc->mat_graph->xadj; 8651 used_adjncy = pcbddc->mat_graph->adjncy; 8652 } else if (pcbddc->computed_rowadj) { 8653 used_xadj = pcbddc->mat_graph->xadj; 8654 used_adjncy = pcbddc->mat_graph->adjncy; 8655 } else { 8656 PetscBool flg_row=PETSC_FALSE; 8657 const PetscInt *xadj,*adjncy; 8658 PetscInt nvtxs; 8659 8660 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8661 if (flg_row) { 8662 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8663 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8664 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8665 free_used_adj = PETSC_TRUE; 8666 } else { 8667 pcbddc->sub_schurs_layers = -1; 8668 used_xadj = NULL; 8669 used_adjncy = NULL; 8670 } 8671 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8672 } 8673 } 8674 8675 /* setup sub_schurs data */ 8676 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8677 if (!sub_schurs->schur_explicit) { 8678 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8679 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8680 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8681 } else { 8682 Mat change = NULL; 8683 Vec scaling = NULL; 8684 IS change_primal = NULL, iP; 8685 PetscInt benign_n; 8686 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8687 PetscBool isseqaij,need_change = PETSC_FALSE; 8688 PetscBool discrete_harmonic = PETSC_FALSE; 8689 8690 if (!pcbddc->use_vertices && reuse_solvers) { 8691 PetscInt n_vertices; 8692 8693 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8694 reuse_solvers = (PetscBool)!n_vertices; 8695 } 8696 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8697 if (!isseqaij) { 8698 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8699 if (matis->A == pcbddc->local_mat) { 8700 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8701 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8702 } else { 8703 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8704 } 8705 } 8706 if (!pcbddc->benign_change_explicit) { 8707 benign_n = pcbddc->benign_n; 8708 } else { 8709 benign_n = 0; 8710 } 8711 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8712 We need a global reduction to avoid possible deadlocks. 8713 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8714 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8715 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8716 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8717 need_change = (PetscBool)(!need_change); 8718 } 8719 /* If the user defines additional constraints, we import them here. 8720 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8721 if (need_change) { 8722 PC_IS *pcisf; 8723 PC_BDDC *pcbddcf; 8724 PC pcf; 8725 8726 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8727 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8728 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8729 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8730 8731 /* hacks */ 8732 pcisf = (PC_IS*)pcf->data; 8733 pcisf->is_B_local = pcis->is_B_local; 8734 pcisf->vec1_N = pcis->vec1_N; 8735 pcisf->BtoNmap = pcis->BtoNmap; 8736 pcisf->n = pcis->n; 8737 pcisf->n_B = pcis->n_B; 8738 pcbddcf = (PC_BDDC*)pcf->data; 8739 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8740 pcbddcf->mat_graph = pcbddc->mat_graph; 8741 pcbddcf->use_faces = PETSC_TRUE; 8742 pcbddcf->use_change_of_basis = PETSC_TRUE; 8743 pcbddcf->use_change_on_faces = PETSC_TRUE; 8744 pcbddcf->use_qr_single = PETSC_TRUE; 8745 pcbddcf->fake_change = PETSC_TRUE; 8746 8747 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8748 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8749 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8750 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8751 change = pcbddcf->ConstraintMatrix; 8752 pcbddcf->ConstraintMatrix = NULL; 8753 8754 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8755 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8756 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8757 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8758 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8759 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8760 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8761 pcf->ops->destroy = NULL; 8762 pcf->ops->reset = NULL; 8763 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8764 } 8765 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8766 8767 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 8768 if (iP) { 8769 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 8770 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 8771 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8772 } 8773 if (discrete_harmonic) { 8774 Mat A; 8775 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 8776 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 8777 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 8778 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8779 ierr = MatDestroy(&A);CHKERRQ(ierr); 8780 } else { 8781 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8782 } 8783 ierr = MatDestroy(&change);CHKERRQ(ierr); 8784 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8785 } 8786 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8787 8788 /* free adjacency */ 8789 if (free_used_adj) { 8790 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8791 } 8792 ierr = PetscLogEventEnd(PC_BDDC_Schurs[pcbddc->current_level],pc,0,0,0);CHKERRQ(ierr); 8793 PetscFunctionReturn(0); 8794 } 8795 8796 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8797 { 8798 PC_IS *pcis=(PC_IS*)pc->data; 8799 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8800 PCBDDCGraph graph; 8801 PetscErrorCode ierr; 8802 8803 PetscFunctionBegin; 8804 /* attach interface graph for determining subsets */ 8805 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8806 IS verticesIS,verticescomm; 8807 PetscInt vsize,*idxs; 8808 8809 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8810 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8811 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8812 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8813 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8814 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8815 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8816 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8817 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8818 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8819 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8820 } else { 8821 graph = pcbddc->mat_graph; 8822 } 8823 /* print some info */ 8824 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8825 IS vertices; 8826 PetscInt nv,nedges,nfaces; 8827 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8828 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8829 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8830 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8831 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8832 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%D)\n",PetscGlobalRank,(int)nv,pcbddc->use_vertices);CHKERRQ(ierr); 8833 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%D)\n",PetscGlobalRank,(int)nedges,pcbddc->use_edges);CHKERRQ(ierr); 8834 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%D)\n",PetscGlobalRank,(int)nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8835 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8836 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8837 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8838 } 8839 8840 /* sub_schurs init */ 8841 if (!pcbddc->sub_schurs) { 8842 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8843 } 8844 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8845 8846 /* free graph struct */ 8847 if (pcbddc->sub_schurs_rebuild) { 8848 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8849 } 8850 PetscFunctionReturn(0); 8851 } 8852 8853 PetscErrorCode PCBDDCCheckOperator(PC pc) 8854 { 8855 PC_IS *pcis=(PC_IS*)pc->data; 8856 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8857 PetscErrorCode ierr; 8858 8859 PetscFunctionBegin; 8860 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8861 IS zerodiag = NULL; 8862 Mat S_j,B0_B=NULL; 8863 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8864 PetscScalar *p0_check,*array,*array2; 8865 PetscReal norm; 8866 PetscInt i; 8867 8868 /* B0 and B0_B */ 8869 if (zerodiag) { 8870 IS dummy; 8871 8872 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8873 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8874 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8875 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8876 } 8877 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8878 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8879 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8880 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8881 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8882 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8883 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8884 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8885 /* S_j */ 8886 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8887 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8888 8889 /* mimic vector in \widetilde{W}_\Gamma */ 8890 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8891 /* continuous in primal space */ 8892 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8893 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8894 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8895 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8896 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8897 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8898 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8899 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8900 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8901 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8902 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8903 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8904 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8905 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8906 8907 /* assemble rhs for coarse problem */ 8908 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8909 /* local with Schur */ 8910 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8911 if (zerodiag) { 8912 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8913 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8914 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8915 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8916 } 8917 /* sum on primal nodes the local contributions */ 8918 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8919 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8920 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8921 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8922 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8923 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8924 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8925 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8926 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8927 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8928 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8929 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8930 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8931 /* scale primal nodes (BDDC sums contibutions) */ 8932 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8933 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8934 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8935 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8936 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8937 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8938 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8939 /* global: \widetilde{B0}_B w_\Gamma */ 8940 if (zerodiag) { 8941 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8942 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8943 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8944 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8945 } 8946 /* BDDC */ 8947 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8948 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8949 8950 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8951 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8952 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8953 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm);CHKERRQ(ierr); 8954 for (i=0;i<pcbddc->benign_n;i++) { 8955 ierr = PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%D] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i]));CHKERRQ(ierr); 8956 } 8957 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8958 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8959 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8960 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8961 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8962 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8963 } 8964 PetscFunctionReturn(0); 8965 } 8966 8967 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8968 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8969 { 8970 Mat At; 8971 IS rows; 8972 PetscInt rst,ren; 8973 PetscErrorCode ierr; 8974 PetscLayout rmap; 8975 8976 PetscFunctionBegin; 8977 rst = ren = 0; 8978 if (ccomm != MPI_COMM_NULL) { 8979 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8980 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8981 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8982 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8983 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8984 } 8985 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8986 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8987 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8988 8989 if (ccomm != MPI_COMM_NULL) { 8990 Mat_MPIAIJ *a,*b; 8991 IS from,to; 8992 Vec gvec; 8993 PetscInt lsize; 8994 8995 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8996 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8997 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8998 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8999 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 9000 a = (Mat_MPIAIJ*)At->data; 9001 b = (Mat_MPIAIJ*)(*B)->data; 9002 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 9003 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 9004 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 9005 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 9006 b->A = a->A; 9007 b->B = a->B; 9008 9009 b->donotstash = a->donotstash; 9010 b->roworiented = a->roworiented; 9011 b->rowindices = 0; 9012 b->rowvalues = 0; 9013 b->getrowactive = PETSC_FALSE; 9014 9015 (*B)->rmap = rmap; 9016 (*B)->factortype = A->factortype; 9017 (*B)->assembled = PETSC_TRUE; 9018 (*B)->insertmode = NOT_SET_VALUES; 9019 (*B)->preallocated = PETSC_TRUE; 9020 9021 if (a->colmap) { 9022 #if defined(PETSC_USE_CTABLE) 9023 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 9024 #else 9025 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 9026 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9027 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9028 #endif 9029 } else b->colmap = 0; 9030 if (a->garray) { 9031 PetscInt len; 9032 len = a->B->cmap->n; 9033 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 9034 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 9035 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 9036 } else b->garray = 0; 9037 9038 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 9039 b->lvec = a->lvec; 9040 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 9041 9042 /* cannot use VecScatterCopy */ 9043 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 9044 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 9045 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 9046 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 9047 ierr = VecScatterCreateWithData(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 9048 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 9049 ierr = ISDestroy(&from);CHKERRQ(ierr); 9050 ierr = ISDestroy(&to);CHKERRQ(ierr); 9051 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 9052 } 9053 ierr = MatDestroy(&At);CHKERRQ(ierr); 9054 PetscFunctionReturn(0); 9055 } 9056