1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 #if !defined(PETSC_USE_COMPLEX) 18 PetscScalar *uwork,*data,*U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM,bN,lwork,lierr,di = 1; 21 PetscInt ulw,i,nr,nc,n; 22 PetscErrorCode ierr; 23 24 PetscFunctionBegin; 25 #if defined(PETSC_MISSING_LAPACK_GESVD) 26 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 27 #else 28 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 29 if (!nr || !nc) PetscFunctionReturn(0); 30 31 /* workspace */ 32 if (!work) { 33 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 34 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 35 } else { 36 ulw = lw; 37 uwork = work; 38 } 39 n = PetscMin(nr,nc); 40 if (!rwork) { 41 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 42 } else { 43 sing = rwork; 44 } 45 46 /* SVD */ 47 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 49 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 50 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 51 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 52 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 53 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 54 ierr = PetscFPTrapPop();CHKERRQ(ierr); 55 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 56 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 57 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 58 if (!rwork) { 59 ierr = PetscFree(sing);CHKERRQ(ierr); 60 } 61 if (!work) { 62 ierr = PetscFree(uwork);CHKERRQ(ierr); 63 } 64 /* create B */ 65 if (!range) { 66 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 67 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 68 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 69 } else { 70 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 71 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 72 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 73 } 74 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 75 ierr = PetscFree(U);CHKERRQ(ierr); 76 #endif 77 #else /* PETSC_USE_COMPLEX */ 78 PetscFunctionBegin; 79 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 80 #endif 81 PetscFunctionReturn(0); 82 } 83 84 /* TODO REMOVE */ 85 #if defined(PRINT_GDET) 86 static int inc = 0; 87 static int lev = 0; 88 #endif 89 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 PetscScalar *vals,v; 122 123 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 124 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 125 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 126 /* v = PetscAbsScalar(vals[0]) */; 127 v = 1.; 128 cvals[0] = vals[0]/v; 129 cvals[1] = vals[1]/v; 130 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 131 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 132 #if defined(PRINT_GDET) 133 { 134 PetscViewer viewer; 135 char filename[256]; 136 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 137 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 138 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 140 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 141 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 142 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 143 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 144 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 145 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 146 } 147 #endif 148 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 149 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 150 } 151 152 PetscFunctionReturn(0); 153 } 154 155 PetscErrorCode PCBDDCNedelecSupport(PC pc) 156 { 157 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 158 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 159 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 160 Vec tvec; 161 PetscSF sfv; 162 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 163 MPI_Comm comm; 164 IS lned,primals,allprimals,nedfieldlocal; 165 IS *eedges,*extrows,*extcols,*alleedges; 166 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 167 PetscScalar *vals,*work; 168 PetscReal *rwork; 169 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 170 PetscInt ne,nv,Lv,order,n,field; 171 PetscInt n_neigh,*neigh,*n_shared,**shared; 172 PetscInt i,j,extmem,cum,maxsize,nee; 173 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 174 PetscInt *sfvleaves,*sfvroots; 175 PetscInt *corners,*cedges; 176 PetscInt *ecount,**eneighs,*vcount,**vneighs; 177 #if defined(PETSC_USE_DEBUG) 178 PetscInt *emarks; 179 #endif 180 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 181 PetscErrorCode ierr; 182 183 PetscFunctionBegin; 184 /* If the discrete gradient is defined for a subset of dofs and global is true, 185 it assumes G is given in global ordering for all the dofs. 186 Otherwise, the ordering is global for the Nedelec field */ 187 order = pcbddc->nedorder; 188 conforming = pcbddc->conforming; 189 field = pcbddc->nedfield; 190 global = pcbddc->nedglobal; 191 setprimal = PETSC_FALSE; 192 print = PETSC_FALSE; 193 singular = PETSC_FALSE; 194 195 /* Command line customization */ 196 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 199 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 200 /* print debug info TODO: to be removed */ 201 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 202 ierr = PetscOptionsEnd();CHKERRQ(ierr); 203 204 /* Return if there are no edges in the decomposition and the problem is not singular */ 205 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 206 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 207 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 208 if (!singular) { 209 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 210 lrc[0] = PETSC_FALSE; 211 for (i=0;i<n;i++) { 212 if (PetscRealPart(vals[i]) > 2.) { 213 lrc[0] = PETSC_TRUE; 214 break; 215 } 216 } 217 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 218 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 219 if (!lrc[1]) PetscFunctionReturn(0); 220 } 221 222 /* Get Nedelec field */ 223 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 224 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 225 if (pcbddc->n_ISForDofsLocal && field >= 0) { 226 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 227 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 228 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 229 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 230 ne = n; 231 nedfieldlocal = NULL; 232 global = PETSC_TRUE; 233 } else if (field == PETSC_DECIDE) { 234 PetscInt rst,ren,*idx; 235 236 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 237 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 238 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 239 for (i=rst;i<ren;i++) { 240 PetscInt nc; 241 242 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 243 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 244 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 245 } 246 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 247 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 248 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 249 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 250 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 251 } else { 252 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 253 } 254 255 /* Sanity checks */ 256 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 257 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 258 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 259 260 /* Just set primal dofs and return */ 261 if (setprimal) { 262 IS enedfieldlocal; 263 PetscInt *eidxs; 264 265 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 266 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 267 if (nedfieldlocal) { 268 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 269 for (i=0,cum=0;i<ne;i++) { 270 if (PetscRealPart(vals[idxs[i]]) > 2.) { 271 eidxs[cum++] = idxs[i]; 272 } 273 } 274 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 275 } else { 276 for (i=0,cum=0;i<ne;i++) { 277 if (PetscRealPart(vals[i]) > 2.) { 278 eidxs[cum++] = i; 279 } 280 } 281 } 282 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 283 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 284 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 285 ierr = PetscFree(eidxs);CHKERRQ(ierr); 286 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 287 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 288 PetscFunctionReturn(0); 289 } 290 291 /* Compute some l2g maps */ 292 if (nedfieldlocal) { 293 IS is; 294 295 /* need to map from the local Nedelec field to local numbering */ 296 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 297 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 298 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 299 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 300 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 301 if (global) { 302 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 303 el2g = al2g; 304 } else { 305 IS gis; 306 307 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 308 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 309 ierr = ISDestroy(&gis);CHKERRQ(ierr); 310 } 311 ierr = ISDestroy(&is);CHKERRQ(ierr); 312 } else { 313 /* restore default */ 314 pcbddc->nedfield = -1; 315 /* one ref for the destruction of al2g, one for el2g */ 316 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 317 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 318 el2g = al2g; 319 fl2g = NULL; 320 } 321 322 /* Start communication to drop connections for interior edges (for cc analysis only) */ 323 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 324 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 325 if (nedfieldlocal) { 326 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 327 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 328 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 329 } else { 330 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 331 } 332 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 333 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 334 335 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 336 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 337 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 338 if (global) { 339 PetscInt rst; 340 341 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 342 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 343 if (matis->sf_rootdata[i] < 2) { 344 matis->sf_rootdata[cum++] = i + rst; 345 } 346 } 347 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 348 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 349 } else { 350 PetscInt *tbz; 351 352 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 353 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 354 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 355 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 356 for (i=0,cum=0;i<ne;i++) 357 if (matis->sf_leafdata[idxs[i]] == 1) 358 tbz[cum++] = i; 359 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 360 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 361 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 362 ierr = PetscFree(tbz);CHKERRQ(ierr); 363 } 364 } else { /* we need the entire G to infer the nullspace */ 365 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 366 G = pcbddc->discretegradient; 367 } 368 369 /* Extract subdomain relevant rows of G */ 370 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 371 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 372 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 373 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 374 ierr = ISDestroy(&lned);CHKERRQ(ierr); 375 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 376 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 377 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 378 379 /* SF for nodal dofs communications */ 380 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 381 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 382 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 384 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 386 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 387 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 388 i = singular ? 2 : 1; 389 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 390 391 /* Destroy temporary G created in MATIS format and modified G */ 392 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 393 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 394 ierr = MatDestroy(&G);CHKERRQ(ierr); 395 396 if (print) { 397 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 398 ierr = MatView(lG,NULL);CHKERRQ(ierr); 399 } 400 401 /* Save lG for values insertion in change of basis */ 402 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 403 404 /* Analyze the edge-nodes connections (duplicate lG) */ 405 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 406 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 407 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 409 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 410 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 411 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 412 /* need to import the boundary specification to ensure the 413 proper detection of coarse edges' endpoints */ 414 if (pcbddc->DirichletBoundariesLocal) { 415 IS is; 416 417 if (fl2g) { 418 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 419 } else { 420 is = pcbddc->DirichletBoundariesLocal; 421 } 422 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 423 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 424 for (i=0;i<cum;i++) { 425 if (idxs[i] >= 0) { 426 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 427 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 428 } 429 } 430 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 431 if (fl2g) { 432 ierr = ISDestroy(&is);CHKERRQ(ierr); 433 } 434 } 435 if (pcbddc->NeumannBoundariesLocal) { 436 IS is; 437 438 if (fl2g) { 439 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 440 } else { 441 is = pcbddc->NeumannBoundariesLocal; 442 } 443 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 444 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 445 for (i=0;i<cum;i++) { 446 if (idxs[i] >= 0) { 447 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 448 } 449 } 450 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 451 if (fl2g) { 452 ierr = ISDestroy(&is);CHKERRQ(ierr); 453 } 454 } 455 456 /* Count neighs per dof */ 457 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 458 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 459 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 460 for (i=1,cum=0;i<n_neigh;i++) { 461 cum += n_shared[i]; 462 for (j=0;j<n_shared[i];j++) { 463 ecount[shared[i][j]]++; 464 } 465 } 466 if (ne) { 467 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 468 } 469 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 470 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 471 for (i=1;i<n_neigh;i++) { 472 for (j=0;j<n_shared[i];j++) { 473 PetscInt k = shared[i][j]; 474 eneighs[k][ecount[k]] = neigh[i]; 475 ecount[k]++; 476 } 477 } 478 for (i=0;i<ne;i++) { 479 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 480 } 481 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 482 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 483 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 484 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 485 for (i=1,cum=0;i<n_neigh;i++) { 486 cum += n_shared[i]; 487 for (j=0;j<n_shared[i];j++) { 488 vcount[shared[i][j]]++; 489 } 490 } 491 if (nv) { 492 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 493 } 494 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 495 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 496 for (i=1;i<n_neigh;i++) { 497 for (j=0;j<n_shared[i];j++) { 498 PetscInt k = shared[i][j]; 499 vneighs[k][vcount[k]] = neigh[i]; 500 vcount[k]++; 501 } 502 } 503 for (i=0;i<nv;i++) { 504 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 505 } 506 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 507 508 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 509 for proper detection of coarse edges' endpoints */ 510 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 511 for (i=0;i<ne;i++) { 512 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 513 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 514 } 515 } 516 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 517 if (!conforming) { 518 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 519 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 520 } 521 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 522 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 523 cum = 0; 524 for (i=0;i<ne;i++) { 525 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 526 if (!PetscBTLookup(btee,i)) { 527 marks[cum++] = i; 528 continue; 529 } 530 /* set badly connected edge dofs as primal */ 531 if (!conforming) { 532 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 533 marks[cum++] = i; 534 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 535 for (j=ii[i];j<ii[i+1];j++) { 536 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 537 } 538 } else { 539 /* every edge dofs should be connected trough a certain number of nodal dofs 540 to other edge dofs belonging to coarse edges 541 - at most 2 endpoints 542 - order-1 interior nodal dofs 543 - no undefined nodal dofs (nconn < order) 544 */ 545 PetscInt ends = 0,ints = 0, undef = 0; 546 for (j=ii[i];j<ii[i+1];j++) { 547 PetscInt v = jj[j],k; 548 PetscInt nconn = iit[v+1]-iit[v]; 549 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 550 if (nconn > order) ends++; 551 else if (nconn == order) ints++; 552 else undef++; 553 } 554 if (undef || ends > 2 || ints != order -1) { 555 marks[cum++] = i; 556 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 557 for (j=ii[i];j<ii[i+1];j++) { 558 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 559 } 560 } 561 } 562 } 563 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 564 if (!order && ii[i+1] != ii[i]) { 565 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 566 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 567 } 568 } 569 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 570 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 571 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 572 if (!conforming) { 573 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 574 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 575 } 576 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 577 578 /* identify splitpoints and corner candidates */ 579 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 580 if (print) { 581 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 582 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 583 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 584 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 585 } 586 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 587 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 588 for (i=0;i<nv;i++) { 589 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 590 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 591 if (!order) { /* variable order */ 592 PetscReal vorder = 0.; 593 594 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 595 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 596 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 597 ord = 1; 598 } 599 #if defined(PETSC_USE_DEBUG) 600 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 601 #endif 602 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 603 if (PetscBTLookup(btbd,jj[j])) { 604 bdir = PETSC_TRUE; 605 break; 606 } 607 if (vc != ecount[jj[j]]) { 608 sneighs = PETSC_FALSE; 609 } else { 610 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 611 for (k=0;k<vc;k++) { 612 if (vn[k] != en[k]) { 613 sneighs = PETSC_FALSE; 614 break; 615 } 616 } 617 } 618 } 619 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 620 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 621 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 622 } else if (test == ord) { 623 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 624 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 625 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 626 } else { 627 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 628 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 629 } 630 } 631 } 632 ierr = PetscFree(ecount);CHKERRQ(ierr); 633 ierr = PetscFree(vcount);CHKERRQ(ierr); 634 if (ne) { 635 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 636 } 637 if (nv) { 638 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 639 } 640 ierr = PetscFree(eneighs);CHKERRQ(ierr); 641 ierr = PetscFree(vneighs);CHKERRQ(ierr); 642 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 643 644 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 645 if (order != 1) { 646 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 647 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 648 for (i=0;i<nv;i++) { 649 if (PetscBTLookup(btvcand,i)) { 650 PetscBool found = PETSC_FALSE; 651 for (j=ii[i];j<ii[i+1] && !found;j++) { 652 PetscInt k,e = jj[j]; 653 if (PetscBTLookup(bte,e)) continue; 654 for (k=iit[e];k<iit[e+1];k++) { 655 PetscInt v = jjt[k]; 656 if (v != i && PetscBTLookup(btvcand,v)) { 657 found = PETSC_TRUE; 658 break; 659 } 660 } 661 } 662 if (!found) { 663 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 664 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 665 } else { 666 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 667 } 668 } 669 } 670 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 671 } 672 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 673 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 674 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 675 676 /* Get the local G^T explicitly */ 677 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 678 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 679 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 680 681 /* Mark interior nodal dofs */ 682 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 683 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 684 for (i=1;i<n_neigh;i++) { 685 for (j=0;j<n_shared[i];j++) { 686 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 687 } 688 } 689 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 690 691 /* communicate corners and splitpoints */ 692 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 693 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 694 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 695 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 696 697 if (print) { 698 IS tbz; 699 700 cum = 0; 701 for (i=0;i<nv;i++) 702 if (sfvleaves[i]) 703 vmarks[cum++] = i; 704 705 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 706 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 707 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 708 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 709 } 710 711 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 712 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 713 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 714 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 715 716 /* Zero rows of lGt corresponding to identified corners 717 and interior nodal dofs */ 718 cum = 0; 719 for (i=0;i<nv;i++) { 720 if (sfvleaves[i]) { 721 vmarks[cum++] = i; 722 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 723 } 724 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 725 } 726 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 727 if (print) { 728 IS tbz; 729 730 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 731 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 732 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 733 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 734 } 735 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 736 ierr = PetscFree(vmarks);CHKERRQ(ierr); 737 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 738 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 739 740 /* Recompute G */ 741 ierr = MatDestroy(&lG);CHKERRQ(ierr); 742 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 743 if (print) { 744 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 745 ierr = MatView(lG,NULL);CHKERRQ(ierr); 746 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 747 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 748 } 749 750 /* Get primal dofs (if any) */ 751 cum = 0; 752 for (i=0;i<ne;i++) { 753 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 754 } 755 if (fl2g) { 756 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 757 } 758 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 759 if (print) { 760 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 761 ierr = ISView(primals,NULL);CHKERRQ(ierr); 762 } 763 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 764 /* TODO: what if the user passed in some of them ? */ 765 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 766 ierr = ISDestroy(&primals);CHKERRQ(ierr); 767 768 /* Compute edge connectivity */ 769 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 770 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 771 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 772 if (fl2g) { 773 PetscBT btf; 774 PetscInt *iia,*jja,*iiu,*jju; 775 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 776 777 /* create CSR for all local dofs */ 778 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 779 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 780 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 781 iiu = pcbddc->mat_graph->xadj; 782 jju = pcbddc->mat_graph->adjncy; 783 } else if (pcbddc->use_local_adj) { 784 rest = PETSC_TRUE; 785 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 786 } else { 787 free = PETSC_TRUE; 788 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 789 iiu[0] = 0; 790 for (i=0;i<n;i++) { 791 iiu[i+1] = i+1; 792 jju[i] = -1; 793 } 794 } 795 796 /* import sizes of CSR */ 797 iia[0] = 0; 798 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 799 800 /* overwrite entries corresponding to the Nedelec field */ 801 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 802 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 803 for (i=0;i<ne;i++) { 804 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 805 iia[idxs[i]+1] = ii[i+1]-ii[i]; 806 } 807 808 /* iia in CSR */ 809 for (i=0;i<n;i++) iia[i+1] += iia[i]; 810 811 /* jja in CSR */ 812 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 813 for (i=0;i<n;i++) 814 if (!PetscBTLookup(btf,i)) 815 for (j=0;j<iiu[i+1]-iiu[i];j++) 816 jja[iia[i]+j] = jju[iiu[i]+j]; 817 818 /* map edge dofs connectivity */ 819 if (jj) { 820 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 821 for (i=0;i<ne;i++) { 822 PetscInt e = idxs[i]; 823 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 824 } 825 } 826 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 827 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 828 if (rest) { 829 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 830 } 831 if (free) { 832 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 833 } 834 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 835 } else { 836 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 837 } 838 839 /* Analyze interface for edge dofs */ 840 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 841 pcbddc->mat_graph->twodim = PETSC_FALSE; 842 843 /* Get coarse edges in the edge space */ 844 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 845 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 846 847 if (fl2g) { 848 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 849 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 850 for (i=0;i<nee;i++) { 851 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 852 } 853 } else { 854 eedges = alleedges; 855 primals = allprimals; 856 } 857 858 /* Mark fine edge dofs with their coarse edge id */ 859 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 860 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 861 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 862 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 863 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 864 if (print) { 865 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 866 ierr = ISView(primals,NULL);CHKERRQ(ierr); 867 } 868 869 maxsize = 0; 870 for (i=0;i<nee;i++) { 871 PetscInt size,mark = i+1; 872 873 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 874 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 875 for (j=0;j<size;j++) marks[idxs[j]] = mark; 876 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 877 maxsize = PetscMax(maxsize,size); 878 } 879 880 /* Find coarse edge endpoints */ 881 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 882 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 883 for (i=0;i<nee;i++) { 884 PetscInt mark = i+1,size; 885 886 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 887 if (!size && nedfieldlocal) continue; 888 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 889 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 890 if (print) { 891 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 892 ISView(eedges[i],NULL); 893 } 894 for (j=0;j<size;j++) { 895 PetscInt k, ee = idxs[j]; 896 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 897 for (k=ii[ee];k<ii[ee+1];k++) { 898 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 899 if (PetscBTLookup(btv,jj[k])) { 900 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 901 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 902 PetscInt k2; 903 PetscBool corner = PETSC_FALSE; 904 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 905 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 906 /* it's a corner if either is connected with an edge dof belonging to a different cc or 907 if the edge dof lie on the natural part of the boundary */ 908 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 909 corner = PETSC_TRUE; 910 break; 911 } 912 } 913 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 914 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 915 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 916 } else { 917 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 918 } 919 } 920 } 921 } 922 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 923 } 924 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 925 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 926 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 927 928 /* Reset marked primal dofs */ 929 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 930 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 931 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 932 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 933 934 /* Now use the initial lG */ 935 ierr = MatDestroy(&lG);CHKERRQ(ierr); 936 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 937 lG = lGinit; 938 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 939 940 /* Compute extended cols indices */ 941 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 942 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 943 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 944 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 945 i *= maxsize; 946 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 947 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 948 eerr = PETSC_FALSE; 949 for (i=0;i<nee;i++) { 950 PetscInt size,found = 0; 951 952 cum = 0; 953 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 954 if (!size && nedfieldlocal) continue; 955 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 956 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 957 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 958 for (j=0;j<size;j++) { 959 PetscInt k,ee = idxs[j]; 960 for (k=ii[ee];k<ii[ee+1];k++) { 961 PetscInt vv = jj[k]; 962 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 963 else if (!PetscBTLookupSet(btvc,vv)) found++; 964 } 965 } 966 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 967 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 968 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 969 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 970 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 971 /* it may happen that endpoints are not defined at this point 972 if it is the case, mark this edge for a second pass */ 973 if (cum != size -1 || found != 2) { 974 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 975 if (print) { 976 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 977 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 978 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 979 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 980 } 981 eerr = PETSC_TRUE; 982 } 983 } 984 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 985 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 986 if (done) { 987 PetscInt *newprimals; 988 989 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 990 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 991 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 992 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 993 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 994 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 995 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 996 for (i=0;i<nee;i++) { 997 PetscBool has_candidates = PETSC_FALSE; 998 if (PetscBTLookup(bter,i)) { 999 PetscInt size,mark = i+1; 1000 1001 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1002 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1003 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1004 for (j=0;j<size;j++) { 1005 PetscInt k,ee = idxs[j]; 1006 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1007 for (k=ii[ee];k<ii[ee+1];k++) { 1008 /* set all candidates located on the edge as corners */ 1009 if (PetscBTLookup(btvcand,jj[k])) { 1010 PetscInt k2,vv = jj[k]; 1011 has_candidates = PETSC_TRUE; 1012 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1013 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1014 /* set all edge dofs connected to candidate as primals */ 1015 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1016 if (marks[jjt[k2]] == mark) { 1017 PetscInt k3,ee2 = jjt[k2]; 1018 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1019 newprimals[cum++] = ee2; 1020 /* finally set the new corners */ 1021 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1022 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1023 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1024 } 1025 } 1026 } 1027 } else { 1028 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1029 } 1030 } 1031 } 1032 if (!has_candidates) { /* circular edge */ 1033 PetscInt k, ee = idxs[0],*tmarks; 1034 1035 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1036 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1037 for (k=ii[ee];k<ii[ee+1];k++) { 1038 PetscInt k2; 1039 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1040 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1041 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1042 } 1043 for (j=0;j<size;j++) { 1044 if (tmarks[idxs[j]] > 1) { 1045 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1046 newprimals[cum++] = idxs[j]; 1047 } 1048 } 1049 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1050 } 1051 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1052 } 1053 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1054 } 1055 ierr = PetscFree(extcols);CHKERRQ(ierr); 1056 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1057 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1058 if (fl2g) { 1059 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1060 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1061 for (i=0;i<nee;i++) { 1062 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1063 } 1064 ierr = PetscFree(eedges);CHKERRQ(ierr); 1065 } 1066 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1067 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1068 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1069 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1070 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1071 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1072 pcbddc->mat_graph->twodim = PETSC_FALSE; 1073 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1074 if (fl2g) { 1075 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1076 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1077 for (i=0;i<nee;i++) { 1078 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1079 } 1080 } else { 1081 eedges = alleedges; 1082 primals = allprimals; 1083 } 1084 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1085 1086 /* Mark again */ 1087 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1088 for (i=0;i<nee;i++) { 1089 PetscInt size,mark = i+1; 1090 1091 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1092 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1093 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1094 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1095 } 1096 if (print) { 1097 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1098 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1099 } 1100 1101 /* Recompute extended cols */ 1102 eerr = PETSC_FALSE; 1103 for (i=0;i<nee;i++) { 1104 PetscInt size; 1105 1106 cum = 0; 1107 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1108 if (!size && nedfieldlocal) continue; 1109 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1110 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1111 for (j=0;j<size;j++) { 1112 PetscInt k,ee = idxs[j]; 1113 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1114 } 1115 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1116 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1117 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1118 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1119 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1120 if (cum != size -1) { 1121 if (print) { 1122 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1123 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1125 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1126 } 1127 eerr = PETSC_TRUE; 1128 } 1129 } 1130 } 1131 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1132 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1133 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1134 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1135 /* an error should not occur at this point */ 1136 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1137 1138 /* Check the number of endpoints */ 1139 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1140 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1141 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1142 for (i=0;i<nee;i++) { 1143 PetscInt size, found = 0, gc[2]; 1144 1145 /* init with defaults */ 1146 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1147 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1148 if (!size && nedfieldlocal) continue; 1149 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1150 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1151 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1152 for (j=0;j<size;j++) { 1153 PetscInt k,ee = idxs[j]; 1154 for (k=ii[ee];k<ii[ee+1];k++) { 1155 PetscInt vv = jj[k]; 1156 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1157 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1158 corners[i*2+found++] = vv; 1159 } 1160 } 1161 } 1162 if (found != 2) { 1163 PetscInt e; 1164 if (fl2g) { 1165 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1166 } else { 1167 e = idxs[0]; 1168 } 1169 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1170 } 1171 1172 /* get primal dof index on this coarse edge */ 1173 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1174 if (gc[0] > gc[1]) { 1175 PetscInt swap = corners[2*i]; 1176 corners[2*i] = corners[2*i+1]; 1177 corners[2*i+1] = swap; 1178 } 1179 cedges[i] = idxs[size-1]; 1180 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1181 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1182 } 1183 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1184 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1185 1186 #if defined(PETSC_USE_DEBUG) 1187 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1188 not interfere with neighbouring coarse edges */ 1189 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1190 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1191 for (i=0;i<nv;i++) { 1192 PetscInt emax = 0,eemax = 0; 1193 1194 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1195 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1196 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1197 for (j=1;j<nee+1;j++) { 1198 if (emax < emarks[j]) { 1199 emax = emarks[j]; 1200 eemax = j; 1201 } 1202 } 1203 /* not relevant for edges */ 1204 if (!eemax) continue; 1205 1206 for (j=ii[i];j<ii[i+1];j++) { 1207 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1208 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1209 } 1210 } 1211 } 1212 ierr = PetscFree(emarks);CHKERRQ(ierr); 1213 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1214 #endif 1215 1216 /* Compute extended rows indices for edge blocks of the change of basis */ 1217 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1218 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1219 extmem *= maxsize; 1220 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1221 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1222 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1223 for (i=0;i<nv;i++) { 1224 PetscInt mark = 0,size,start; 1225 1226 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1227 for (j=ii[i];j<ii[i+1];j++) 1228 if (marks[jj[j]] && !mark) 1229 mark = marks[jj[j]]; 1230 1231 /* not relevant */ 1232 if (!mark) continue; 1233 1234 /* import extended row */ 1235 mark--; 1236 start = mark*extmem+extrowcum[mark]; 1237 size = ii[i+1]-ii[i]; 1238 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1239 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1240 extrowcum[mark] += size; 1241 } 1242 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1243 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1244 ierr = PetscFree(marks);CHKERRQ(ierr); 1245 1246 /* Compress extrows */ 1247 cum = 0; 1248 for (i=0;i<nee;i++) { 1249 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1250 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1251 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1252 cum = PetscMax(cum,size); 1253 } 1254 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1255 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1256 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1257 1258 /* Workspace for lapack inner calls and VecSetValues */ 1259 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1260 1261 /* Create change of basis matrix (preallocation can be improved) */ 1262 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1263 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1264 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1265 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1266 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1267 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1268 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1269 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1270 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1271 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1272 1273 /* Defaults to identity */ 1274 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1275 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1276 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1277 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1278 1279 /* Create discrete gradient for the coarser level if needed */ 1280 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1281 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1282 if (pcbddc->current_level < pcbddc->max_levels) { 1283 ISLocalToGlobalMapping cel2g,cvl2g; 1284 IS wis,gwis; 1285 PetscInt cnv,cne; 1286 1287 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1288 if (fl2g) { 1289 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1290 } else { 1291 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1292 pcbddc->nedclocal = wis; 1293 } 1294 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1295 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1296 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1297 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1298 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1299 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1300 1301 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1302 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1303 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1304 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1305 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1306 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1307 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1308 1309 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1310 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1311 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1312 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1313 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1314 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1315 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1316 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1317 } 1318 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1319 1320 #if defined(PRINT_GDET) 1321 inc = 0; 1322 lev = pcbddc->current_level; 1323 #endif 1324 1325 /* Insert values in the change of basis matrix */ 1326 for (i=0;i<nee;i++) { 1327 Mat Gins = NULL, GKins = NULL; 1328 IS cornersis = NULL; 1329 PetscScalar cvals[2]; 1330 1331 if (pcbddc->nedcG) { 1332 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1333 } 1334 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1335 if (Gins && GKins) { 1336 PetscScalar *data; 1337 const PetscInt *rows,*cols; 1338 PetscInt nrh,nch,nrc,ncc; 1339 1340 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1341 /* H1 */ 1342 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1343 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1344 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1345 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1346 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1347 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1348 /* complement */ 1349 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1350 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1351 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1352 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1353 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1354 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1355 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1356 1357 /* coarse discrete gradient */ 1358 if (pcbddc->nedcG) { 1359 PetscInt cols[2]; 1360 1361 cols[0] = 2*i; 1362 cols[1] = 2*i+1; 1363 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1364 } 1365 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1366 } 1367 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1368 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1369 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1370 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1371 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1372 } 1373 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1374 1375 /* Start assembling */ 1376 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1377 if (pcbddc->nedcG) { 1378 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1379 } 1380 1381 /* Free */ 1382 if (fl2g) { 1383 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1384 for (i=0;i<nee;i++) { 1385 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(eedges);CHKERRQ(ierr); 1388 } 1389 1390 /* hack mat_graph with primal dofs on the coarse edges */ 1391 { 1392 PCBDDCGraph graph = pcbddc->mat_graph; 1393 PetscInt *oqueue = graph->queue; 1394 PetscInt *ocptr = graph->cptr; 1395 PetscInt ncc,*idxs; 1396 1397 /* find first primal edge */ 1398 if (pcbddc->nedclocal) { 1399 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1400 } else { 1401 if (fl2g) { 1402 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1403 } 1404 idxs = cedges; 1405 } 1406 cum = 0; 1407 while (cum < nee && cedges[cum] < 0) cum++; 1408 1409 /* adapt connected components */ 1410 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1411 graph->cptr[0] = 0; 1412 for (i=0,ncc=0;i<graph->ncc;i++) { 1413 PetscInt lc = ocptr[i+1]-ocptr[i]; 1414 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1415 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1416 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1417 ncc++; 1418 lc--; 1419 cum++; 1420 while (cum < nee && cedges[cum] < 0) cum++; 1421 } 1422 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1423 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1424 ncc++; 1425 } 1426 graph->ncc = ncc; 1427 if (pcbddc->nedclocal) { 1428 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1429 } 1430 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1431 } 1432 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1433 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1434 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1435 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1436 1437 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1438 ierr = PetscFree(extrow);CHKERRQ(ierr); 1439 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1440 ierr = PetscFree(corners);CHKERRQ(ierr); 1441 ierr = PetscFree(cedges);CHKERRQ(ierr); 1442 ierr = PetscFree(extrows);CHKERRQ(ierr); 1443 ierr = PetscFree(extcols);CHKERRQ(ierr); 1444 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1445 1446 /* Complete assembling */ 1447 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1448 if (pcbddc->nedcG) { 1449 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1450 #if 0 1451 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1452 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1453 #endif 1454 } 1455 1456 /* set change of basis */ 1457 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1458 ierr = MatDestroy(&T);CHKERRQ(ierr); 1459 1460 PetscFunctionReturn(0); 1461 } 1462 1463 /* the near-null space of BDDC carries information on quadrature weights, 1464 and these can be collinear -> so cheat with MatNullSpaceCreate 1465 and create a suitable set of basis vectors first */ 1466 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1467 { 1468 PetscErrorCode ierr; 1469 PetscInt i; 1470 1471 PetscFunctionBegin; 1472 for (i=0;i<nvecs;i++) { 1473 PetscInt first,last; 1474 1475 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1476 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1477 if (i>=first && i < last) { 1478 PetscScalar *data; 1479 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1480 if (!has_const) { 1481 data[i-first] = 1.; 1482 } else { 1483 data[2*i-first] = 1./PetscSqrtReal(2.); 1484 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1485 } 1486 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1487 } 1488 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1489 } 1490 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1491 for (i=0;i<nvecs;i++) { /* reset vectors */ 1492 PetscInt first,last; 1493 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1494 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1495 if (i>=first && i < last) { 1496 PetscScalar *data; 1497 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1498 if (!has_const) { 1499 data[i-first] = 0.; 1500 } else { 1501 data[2*i-first] = 0.; 1502 data[2*i-first+1] = 0.; 1503 } 1504 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1505 } 1506 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1507 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1508 } 1509 PetscFunctionReturn(0); 1510 } 1511 1512 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1513 { 1514 Mat loc_divudotp; 1515 Vec p,v,vins,quad_vec,*quad_vecs; 1516 ISLocalToGlobalMapping map; 1517 PetscScalar *vals; 1518 const PetscScalar *array; 1519 PetscInt i,maxneighs,maxsize; 1520 PetscInt n_neigh,*neigh,*n_shared,**shared; 1521 PetscMPIInt rank; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1526 ierr = MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1527 if (!maxneighs) { 1528 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1529 *nnsp = NULL; 1530 PetscFunctionReturn(0); 1531 } 1532 maxsize = 0; 1533 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1534 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1535 /* create vectors to hold quadrature weights */ 1536 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1537 if (!transpose) { 1538 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1539 } else { 1540 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1541 } 1542 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1543 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1544 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1545 for (i=0;i<maxneighs;i++) { 1546 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1547 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1548 } 1549 1550 /* compute local quad vec */ 1551 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1552 if (!transpose) { 1553 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1554 } else { 1555 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1556 } 1557 ierr = VecSet(p,1.);CHKERRQ(ierr); 1558 if (!transpose) { 1559 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1560 } else { 1561 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1562 } 1563 if (vl2l) { 1564 Mat lA; 1565 VecScatter sc; 1566 1567 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1568 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1569 ierr = VecScatterCreate(v,vl2l,vins,NULL,&sc);CHKERRQ(ierr); 1570 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1571 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1572 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1573 } else { 1574 vins = v; 1575 } 1576 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1577 ierr = VecDestroy(&p);CHKERRQ(ierr); 1578 1579 /* insert in global quadrature vecs */ 1580 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1581 for (i=0;i<n_neigh;i++) { 1582 const PetscInt *idxs; 1583 PetscInt idx,nn,j; 1584 1585 idxs = shared[i]; 1586 nn = n_shared[i]; 1587 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1588 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1589 idx = -(idx+1); 1590 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1591 } 1592 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1593 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1594 if (vl2l) { 1595 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1596 } 1597 ierr = VecDestroy(&v);CHKERRQ(ierr); 1598 ierr = PetscFree(vals);CHKERRQ(ierr); 1599 1600 /* assemble near null space */ 1601 for (i=0;i<maxneighs;i++) { 1602 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1603 } 1604 for (i=0;i<maxneighs;i++) { 1605 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1606 ierr = VecViewFromOptions(quad_vecs[i],NULL,"-pc_bddc_quad_vecs_view");CHKERRQ(ierr); 1607 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1608 } 1609 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1610 PetscFunctionReturn(0); 1611 } 1612 1613 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1614 { 1615 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1616 PetscErrorCode ierr; 1617 1618 PetscFunctionBegin; 1619 if (primalv) { 1620 if (pcbddc->user_primal_vertices_local) { 1621 IS list[2], newp; 1622 1623 list[0] = primalv; 1624 list[1] = pcbddc->user_primal_vertices_local; 1625 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1626 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1627 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1628 pcbddc->user_primal_vertices_local = newp; 1629 } else { 1630 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1631 } 1632 } 1633 PetscFunctionReturn(0); 1634 } 1635 1636 static PetscErrorCode func_coords_private(PetscInt dim, PetscReal t, const PetscReal X[], PetscInt Nf, PetscScalar *out, void *ctx) 1637 { 1638 PetscInt f, *comp = (PetscInt *)ctx; 1639 1640 PetscFunctionBegin; 1641 for (f=0;f<Nf;f++) out[f] = X[*comp]; 1642 PetscFunctionReturn(0); 1643 } 1644 1645 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1646 { 1647 PetscErrorCode ierr; 1648 Vec local,global; 1649 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1650 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1651 PetscBool monolithic = PETSC_FALSE; 1652 1653 PetscFunctionBegin; 1654 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1655 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1656 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1657 /* need to convert from global to local topology information and remove references to information in global ordering */ 1658 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1659 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1660 if (monolithic) { /* just get block size to properly compute vertices */ 1661 if (pcbddc->vertex_size == 1) { 1662 ierr = MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);CHKERRQ(ierr); 1663 } 1664 goto boundary; 1665 } 1666 1667 if (pcbddc->user_provided_isfordofs) { 1668 if (pcbddc->n_ISForDofs) { 1669 PetscInt i; 1670 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1671 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1672 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1673 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1674 } 1675 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1676 pcbddc->n_ISForDofs = 0; 1677 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1678 } 1679 } else { 1680 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1681 DM dm; 1682 1683 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1684 if (!dm) { 1685 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1686 } 1687 if (dm) { 1688 IS *fields; 1689 PetscInt nf,i; 1690 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1691 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1692 for (i=0;i<nf;i++) { 1693 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1694 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1695 } 1696 ierr = PetscFree(fields);CHKERRQ(ierr); 1697 pcbddc->n_ISForDofsLocal = nf; 1698 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1699 PetscContainer c; 1700 1701 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1702 if (c) { 1703 MatISLocalFields lf; 1704 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1705 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1706 } else { /* fallback, create the default fields if bs > 1 */ 1707 PetscInt i, n = matis->A->rmap->n; 1708 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1709 if (i > 1) { 1710 pcbddc->n_ISForDofsLocal = i; 1711 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1712 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1713 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1714 } 1715 } 1716 } 1717 } 1718 } else { 1719 PetscInt i; 1720 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1721 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1722 } 1723 } 1724 } 1725 1726 boundary: 1727 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1728 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1729 } else if (pcbddc->DirichletBoundariesLocal) { 1730 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1731 } 1732 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1733 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1734 } else if (pcbddc->NeumannBoundariesLocal) { 1735 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1736 } 1737 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1738 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1739 } 1740 ierr = VecDestroy(&global);CHKERRQ(ierr); 1741 ierr = VecDestroy(&local);CHKERRQ(ierr); 1742 /* detect local disconnected subdomains if requested (use matis->A) */ 1743 if (pcbddc->detect_disconnected) { 1744 IS primalv = NULL; 1745 PetscInt i; 1746 1747 for (i=0;i<pcbddc->n_local_subs;i++) { 1748 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1749 } 1750 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1751 ierr = PCBDDCDetectDisconnectedComponents(pc,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1752 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1753 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1754 } 1755 /* early stage corner detection */ 1756 { 1757 DM dm; 1758 1759 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1760 if (dm) { 1761 PetscBool isda; 1762 1763 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1764 if (isda) { 1765 ISLocalToGlobalMapping l2l; 1766 IS corners; 1767 Mat lA; 1768 1769 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1770 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1771 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1772 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1773 if (l2l) { 1774 const PetscInt *idx; 1775 PetscInt bs,*idxout,n; 1776 1777 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1778 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1779 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1780 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1781 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1782 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1783 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1784 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1785 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1786 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1787 pcbddc->corner_selected = PETSC_TRUE; 1788 } else { /* not from DMDA */ 1789 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1790 } 1791 } 1792 } 1793 } 1794 if (pcbddc->corner_selection && !pcbddc->mat_graph->cdim) { 1795 DM dm; 1796 1797 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 1798 if (!dm) { 1799 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1800 } 1801 if (dm) { 1802 Vec vcoords; 1803 PetscSection section; 1804 PetscReal *coords; 1805 PetscInt d,cdim,nl,nf,**ctxs; 1806 PetscErrorCode (**funcs)(PetscInt, PetscReal, const PetscReal *, PetscInt, PetscScalar *, void *); 1807 1808 ierr = DMGetCoordinateDim(dm,&cdim);CHKERRQ(ierr); 1809 ierr = DMGetDefaultSection(dm,§ion);CHKERRQ(ierr); 1810 ierr = PetscSectionGetNumFields(section,&nf);CHKERRQ(ierr); 1811 ierr = DMCreateGlobalVector(dm,&vcoords);CHKERRQ(ierr); 1812 ierr = VecGetLocalSize(vcoords,&nl);CHKERRQ(ierr); 1813 ierr = PetscMalloc1(nl*cdim,&coords);CHKERRQ(ierr); 1814 ierr = PetscMalloc2(nf,&funcs,nf,&ctxs);CHKERRQ(ierr); 1815 ierr = PetscMalloc1(nf,&ctxs[0]);CHKERRQ(ierr); 1816 for (d=0;d<nf;d++) funcs[d] = func_coords_private; 1817 for (d=1;d<nf;d++) ctxs[d] = ctxs[d-1] + 1; 1818 for (d=0;d<cdim;d++) { 1819 PetscInt i; 1820 const PetscScalar *v; 1821 1822 for (i=0;i<nf;i++) ctxs[i][0] = d; 1823 ierr = DMProjectFunction(dm,0.0,funcs,(void**)ctxs,INSERT_VALUES,vcoords);CHKERRQ(ierr); 1824 ierr = VecGetArrayRead(vcoords,&v);CHKERRQ(ierr); 1825 for (i=0;i<nl;i++) coords[i*cdim+d] = PetscRealPart(v[i]); 1826 ierr = VecRestoreArrayRead(vcoords,&v);CHKERRQ(ierr); 1827 } 1828 ierr = VecDestroy(&vcoords);CHKERRQ(ierr); 1829 ierr = PCSetCoordinates(pc,cdim,nl,coords);CHKERRQ(ierr); 1830 ierr = PetscFree(coords);CHKERRQ(ierr); 1831 ierr = PetscFree(ctxs[0]);CHKERRQ(ierr); 1832 ierr = PetscFree2(funcs,ctxs);CHKERRQ(ierr); 1833 } 1834 } 1835 PetscFunctionReturn(0); 1836 } 1837 1838 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1839 { 1840 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1841 PetscErrorCode ierr; 1842 IS nis; 1843 const PetscInt *idxs; 1844 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1845 PetscBool *ld; 1846 1847 PetscFunctionBegin; 1848 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1849 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1850 if (mop == MPI_LAND) { 1851 /* init rootdata with true */ 1852 ld = (PetscBool*) matis->sf_rootdata; 1853 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1854 } else { 1855 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1856 } 1857 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1858 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1859 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1860 ld = (PetscBool*) matis->sf_leafdata; 1861 for (i=0;i<nd;i++) 1862 if (-1 < idxs[i] && idxs[i] < n) 1863 ld[idxs[i]] = PETSC_TRUE; 1864 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1865 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1866 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1867 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1868 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1869 if (mop == MPI_LAND) { 1870 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1871 } else { 1872 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1873 } 1874 for (i=0,nnd=0;i<n;i++) 1875 if (ld[i]) 1876 nidxs[nnd++] = i; 1877 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1878 ierr = ISDestroy(is);CHKERRQ(ierr); 1879 *is = nis; 1880 PetscFunctionReturn(0); 1881 } 1882 1883 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1884 { 1885 PC_IS *pcis = (PC_IS*)(pc->data); 1886 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1887 PetscErrorCode ierr; 1888 1889 PetscFunctionBegin; 1890 if (!pcbddc->benign_have_null) { 1891 PetscFunctionReturn(0); 1892 } 1893 if (pcbddc->ChangeOfBasisMatrix) { 1894 Vec swap; 1895 1896 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1897 swap = pcbddc->work_change; 1898 pcbddc->work_change = r; 1899 r = swap; 1900 } 1901 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1902 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1903 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1904 ierr = VecSet(z,0.);CHKERRQ(ierr); 1905 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1906 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1907 if (pcbddc->ChangeOfBasisMatrix) { 1908 pcbddc->work_change = r; 1909 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1910 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1911 } 1912 PetscFunctionReturn(0); 1913 } 1914 1915 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1916 { 1917 PCBDDCBenignMatMult_ctx ctx; 1918 PetscErrorCode ierr; 1919 PetscBool apply_right,apply_left,reset_x; 1920 1921 PetscFunctionBegin; 1922 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1923 if (transpose) { 1924 apply_right = ctx->apply_left; 1925 apply_left = ctx->apply_right; 1926 } else { 1927 apply_right = ctx->apply_right; 1928 apply_left = ctx->apply_left; 1929 } 1930 reset_x = PETSC_FALSE; 1931 if (apply_right) { 1932 const PetscScalar *ax; 1933 PetscInt nl,i; 1934 1935 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1936 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1937 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1938 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1939 for (i=0;i<ctx->benign_n;i++) { 1940 PetscScalar sum,val; 1941 const PetscInt *idxs; 1942 PetscInt nz,j; 1943 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1944 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1945 sum = 0.; 1946 if (ctx->apply_p0) { 1947 val = ctx->work[idxs[nz-1]]; 1948 for (j=0;j<nz-1;j++) { 1949 sum += ctx->work[idxs[j]]; 1950 ctx->work[idxs[j]] += val; 1951 } 1952 } else { 1953 for (j=0;j<nz-1;j++) { 1954 sum += ctx->work[idxs[j]]; 1955 } 1956 } 1957 ctx->work[idxs[nz-1]] -= sum; 1958 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1959 } 1960 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1961 reset_x = PETSC_TRUE; 1962 } 1963 if (transpose) { 1964 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1965 } else { 1966 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1967 } 1968 if (reset_x) { 1969 ierr = VecResetArray(x);CHKERRQ(ierr); 1970 } 1971 if (apply_left) { 1972 PetscScalar *ay; 1973 PetscInt i; 1974 1975 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1976 for (i=0;i<ctx->benign_n;i++) { 1977 PetscScalar sum,val; 1978 const PetscInt *idxs; 1979 PetscInt nz,j; 1980 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1981 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1982 val = -ay[idxs[nz-1]]; 1983 if (ctx->apply_p0) { 1984 sum = 0.; 1985 for (j=0;j<nz-1;j++) { 1986 sum += ay[idxs[j]]; 1987 ay[idxs[j]] += val; 1988 } 1989 ay[idxs[nz-1]] += sum; 1990 } else { 1991 for (j=0;j<nz-1;j++) { 1992 ay[idxs[j]] += val; 1993 } 1994 ay[idxs[nz-1]] = 0.; 1995 } 1996 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1997 } 1998 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1999 } 2000 PetscFunctionReturn(0); 2001 } 2002 2003 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 2004 { 2005 PetscErrorCode ierr; 2006 2007 PetscFunctionBegin; 2008 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 2009 PetscFunctionReturn(0); 2010 } 2011 2012 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 2013 { 2014 PetscErrorCode ierr; 2015 2016 PetscFunctionBegin; 2017 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 2018 PetscFunctionReturn(0); 2019 } 2020 2021 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 2022 { 2023 PC_IS *pcis = (PC_IS*)pc->data; 2024 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2025 PCBDDCBenignMatMult_ctx ctx; 2026 PetscErrorCode ierr; 2027 2028 PetscFunctionBegin; 2029 if (!restore) { 2030 Mat A_IB,A_BI; 2031 PetscScalar *work; 2032 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2033 2034 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2035 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2036 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 2037 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 2038 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2039 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 2040 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 2041 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 2042 ierr = PetscNew(&ctx);CHKERRQ(ierr); 2043 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 2044 ctx->apply_left = PETSC_TRUE; 2045 ctx->apply_right = PETSC_FALSE; 2046 ctx->apply_p0 = PETSC_FALSE; 2047 ctx->benign_n = pcbddc->benign_n; 2048 if (reuse) { 2049 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2050 ctx->free = PETSC_FALSE; 2051 } else { /* TODO: could be optimized for successive solves */ 2052 ISLocalToGlobalMapping N_to_D; 2053 PetscInt i; 2054 2055 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2056 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2057 for (i=0;i<pcbddc->benign_n;i++) { 2058 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2059 } 2060 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2061 ctx->free = PETSC_TRUE; 2062 } 2063 ctx->A = pcis->A_IB; 2064 ctx->work = work; 2065 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2066 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2067 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2068 pcis->A_IB = A_IB; 2069 2070 /* A_BI as A_IB^T */ 2071 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2072 pcbddc->benign_original_mat = pcis->A_BI; 2073 pcis->A_BI = A_BI; 2074 } else { 2075 if (!pcbddc->benign_original_mat) { 2076 PetscFunctionReturn(0); 2077 } 2078 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2079 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2080 pcis->A_IB = ctx->A; 2081 ctx->A = NULL; 2082 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2083 pcis->A_BI = pcbddc->benign_original_mat; 2084 pcbddc->benign_original_mat = NULL; 2085 if (ctx->free) { 2086 PetscInt i; 2087 for (i=0;i<ctx->benign_n;i++) { 2088 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2089 } 2090 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2091 } 2092 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2093 ierr = PetscFree(ctx);CHKERRQ(ierr); 2094 } 2095 PetscFunctionReturn(0); 2096 } 2097 2098 /* used just in bddc debug mode */ 2099 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2100 { 2101 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2102 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2103 Mat An; 2104 PetscErrorCode ierr; 2105 2106 PetscFunctionBegin; 2107 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2108 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2109 if (is1) { 2110 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2111 ierr = MatDestroy(&An);CHKERRQ(ierr); 2112 } else { 2113 *B = An; 2114 } 2115 PetscFunctionReturn(0); 2116 } 2117 2118 /* TODO: add reuse flag */ 2119 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2120 { 2121 Mat Bt; 2122 PetscScalar *a,*bdata; 2123 const PetscInt *ii,*ij; 2124 PetscInt m,n,i,nnz,*bii,*bij; 2125 PetscBool flg_row; 2126 PetscErrorCode ierr; 2127 2128 PetscFunctionBegin; 2129 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2130 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2131 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2132 nnz = n; 2133 for (i=0;i<ii[n];i++) { 2134 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2135 } 2136 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2137 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2138 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2139 nnz = 0; 2140 bii[0] = 0; 2141 for (i=0;i<n;i++) { 2142 PetscInt j; 2143 for (j=ii[i];j<ii[i+1];j++) { 2144 PetscScalar entry = a[j]; 2145 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || (n == m && ij[j] == i)) { 2146 bij[nnz] = ij[j]; 2147 bdata[nnz] = entry; 2148 nnz++; 2149 } 2150 } 2151 bii[i+1] = nnz; 2152 } 2153 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2154 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2155 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2156 { 2157 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2158 b->free_a = PETSC_TRUE; 2159 b->free_ij = PETSC_TRUE; 2160 } 2161 if (*B == A) { 2162 ierr = MatDestroy(&A);CHKERRQ(ierr); 2163 } 2164 *B = Bt; 2165 PetscFunctionReturn(0); 2166 } 2167 2168 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscInt *ncc, IS* cc[], IS* primalv) 2169 { 2170 Mat B = NULL; 2171 DM dm; 2172 IS is_dummy,*cc_n; 2173 ISLocalToGlobalMapping l2gmap_dummy; 2174 PCBDDCGraph graph; 2175 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2176 PetscInt i,n; 2177 PetscInt *xadj,*adjncy; 2178 PetscBool isplex = PETSC_FALSE; 2179 PetscErrorCode ierr; 2180 2181 PetscFunctionBegin; 2182 if (ncc) *ncc = 0; 2183 if (cc) *cc = NULL; 2184 if (primalv) *primalv = NULL; 2185 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2186 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2187 if (!dm) { 2188 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2189 } 2190 if (dm) { 2191 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2192 } 2193 if (isplex) { /* this code has been modified from plexpartition.c */ 2194 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2195 PetscInt *adj = NULL; 2196 IS cellNumbering; 2197 const PetscInt *cellNum; 2198 PetscBool useCone, useClosure; 2199 PetscSection section; 2200 PetscSegBuffer adjBuffer; 2201 PetscSF sfPoint; 2202 PetscErrorCode ierr; 2203 2204 PetscFunctionBegin; 2205 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2206 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2207 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2208 /* Build adjacency graph via a section/segbuffer */ 2209 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2210 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2211 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2212 /* Always use FVM adjacency to create partitioner graph */ 2213 ierr = DMPlexGetAdjacencyUseCone(dm, &useCone);CHKERRQ(ierr); 2214 ierr = DMPlexGetAdjacencyUseClosure(dm, &useClosure);CHKERRQ(ierr); 2215 ierr = DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);CHKERRQ(ierr); 2216 ierr = DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);CHKERRQ(ierr); 2217 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2218 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2219 for (n = 0, p = pStart; p < pEnd; p++) { 2220 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2221 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2222 adjSize = PETSC_DETERMINE; 2223 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2224 for (a = 0; a < adjSize; ++a) { 2225 const PetscInt point = adj[a]; 2226 if (pStart <= point && point < pEnd) { 2227 PetscInt *PETSC_RESTRICT pBuf; 2228 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2229 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2230 *pBuf = point; 2231 } 2232 } 2233 n++; 2234 } 2235 ierr = DMPlexSetAdjacencyUseCone(dm, useCone);CHKERRQ(ierr); 2236 ierr = DMPlexSetAdjacencyUseClosure(dm, useClosure);CHKERRQ(ierr); 2237 /* Derive CSR graph from section/segbuffer */ 2238 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2239 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2240 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2241 for (idx = 0, p = pStart; p < pEnd; p++) { 2242 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2243 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2244 } 2245 xadj[n] = size; 2246 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2247 /* Clean up */ 2248 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2249 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2250 ierr = PetscFree(adj);CHKERRQ(ierr); 2251 graph->xadj = xadj; 2252 graph->adjncy = adjncy; 2253 } else { 2254 Mat A; 2255 PetscBool filter = PETSC_FALSE, isseqaij, flg_row; 2256 2257 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2258 if (!A->rmap->N || !A->cmap->N) { 2259 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2260 PetscFunctionReturn(0); 2261 } 2262 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2263 if (!isseqaij && filter) { 2264 PetscBool isseqdense; 2265 2266 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2267 if (!isseqdense) { 2268 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2269 } else { /* TODO: rectangular case and LDA */ 2270 PetscScalar *array; 2271 PetscReal chop=1.e-6; 2272 2273 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2274 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2275 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2276 for (i=0;i<n;i++) { 2277 PetscInt j; 2278 for (j=i+1;j<n;j++) { 2279 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2280 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2281 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2282 } 2283 } 2284 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2285 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2286 } 2287 } else { 2288 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2289 B = A; 2290 } 2291 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2292 2293 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2294 if (filter) { 2295 PetscScalar *data; 2296 PetscInt j,cum; 2297 2298 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2299 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2300 cum = 0; 2301 for (i=0;i<n;i++) { 2302 PetscInt t; 2303 2304 for (j=xadj[i];j<xadj[i+1];j++) { 2305 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2306 continue; 2307 } 2308 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2309 } 2310 t = xadj_filtered[i]; 2311 xadj_filtered[i] = cum; 2312 cum += t; 2313 } 2314 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2315 graph->xadj = xadj_filtered; 2316 graph->adjncy = adjncy_filtered; 2317 } else { 2318 graph->xadj = xadj; 2319 graph->adjncy = adjncy; 2320 } 2321 } 2322 /* compute local connected components using PCBDDCGraph */ 2323 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2324 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2325 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2326 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2327 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2328 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2329 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2330 2331 /* partial clean up */ 2332 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2333 if (B) { 2334 PetscBool flg_row; 2335 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2336 ierr = MatDestroy(&B);CHKERRQ(ierr); 2337 } 2338 if (isplex) { 2339 ierr = PetscFree(xadj);CHKERRQ(ierr); 2340 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2341 } 2342 2343 /* get back data */ 2344 if (isplex) { 2345 if (ncc) *ncc = graph->ncc; 2346 if (cc || primalv) { 2347 Mat A; 2348 PetscBT btv,btvt; 2349 PetscSection subSection; 2350 PetscInt *ids,cum,cump,*cids,*pids; 2351 2352 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2353 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2354 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2355 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2356 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2357 2358 cids[0] = 0; 2359 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2360 PetscInt j; 2361 2362 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2363 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2364 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2365 2366 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2367 for (k = 0; k < 2*size; k += 2) { 2368 PetscInt s, p = closure[k], off, dof, cdof; 2369 2370 ierr = PetscSectionGetConstraintDof(subSection, p, &cdof);CHKERRQ(ierr); 2371 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2372 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2373 for (s = 0; s < dof-cdof; s++) { 2374 if (PetscBTLookupSet(btvt,off+s)) continue; 2375 if (!PetscBTLookup(btv,off+s)) { 2376 ids[cum++] = off+s; 2377 } else { /* cross-vertex */ 2378 pids[cump++] = off+s; 2379 } 2380 } 2381 } 2382 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2383 } 2384 cids[i+1] = cum; 2385 /* mark dofs as already assigned */ 2386 for (j = cids[i]; j < cids[i+1]; j++) { 2387 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2388 } 2389 } 2390 if (cc) { 2391 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2392 for (i = 0; i < graph->ncc; i++) { 2393 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2394 } 2395 *cc = cc_n; 2396 } 2397 if (primalv) { 2398 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2399 } 2400 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2401 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2402 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2403 } 2404 } else { 2405 if (ncc) *ncc = graph->ncc; 2406 if (cc) { 2407 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2408 for (i=0;i<graph->ncc;i++) { 2409 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2410 } 2411 *cc = cc_n; 2412 } 2413 } 2414 /* clean up graph */ 2415 graph->xadj = 0; 2416 graph->adjncy = 0; 2417 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2418 PetscFunctionReturn(0); 2419 } 2420 2421 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2422 { 2423 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2424 PC_IS* pcis = (PC_IS*)(pc->data); 2425 IS dirIS = NULL; 2426 PetscInt i; 2427 PetscErrorCode ierr; 2428 2429 PetscFunctionBegin; 2430 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2431 if (zerodiag) { 2432 Mat A; 2433 Vec vec3_N; 2434 PetscScalar *vals; 2435 const PetscInt *idxs; 2436 PetscInt nz,*count; 2437 2438 /* p0 */ 2439 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2440 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2441 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2442 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2443 for (i=0;i<nz;i++) vals[i] = 1.; 2444 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2445 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2446 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2447 /* v_I */ 2448 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2449 for (i=0;i<nz;i++) vals[i] = 0.; 2450 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2451 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2452 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2453 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2454 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2455 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2456 if (dirIS) { 2457 PetscInt n; 2458 2459 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2460 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2461 for (i=0;i<n;i++) vals[i] = 0.; 2462 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2463 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2464 } 2465 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2466 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2467 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2468 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2469 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2470 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2471 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2472 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2473 ierr = PetscFree(vals);CHKERRQ(ierr); 2474 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2475 2476 /* there should not be any pressure dofs lying on the interface */ 2477 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2478 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2479 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2480 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2481 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2482 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2483 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2484 ierr = PetscFree(count);CHKERRQ(ierr); 2485 } 2486 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2487 2488 /* check PCBDDCBenignGetOrSetP0 */ 2489 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2490 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2491 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2492 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2493 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2494 for (i=0;i<pcbddc->benign_n;i++) { 2495 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2496 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2497 } 2498 PetscFunctionReturn(0); 2499 } 2500 2501 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2502 { 2503 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2504 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2505 PetscInt nz,n; 2506 PetscInt *interior_dofs,n_interior_dofs,nneu; 2507 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2508 PetscErrorCode ierr; 2509 2510 PetscFunctionBegin; 2511 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2512 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2513 for (n=0;n<pcbddc->benign_n;n++) { 2514 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2515 } 2516 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2517 pcbddc->benign_n = 0; 2518 2519 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2520 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2521 Checks if all the pressure dofs in each subdomain have a zero diagonal 2522 If not, a change of basis on pressures is not needed 2523 since the local Schur complements are already SPD 2524 */ 2525 has_null_pressures = PETSC_TRUE; 2526 have_null = PETSC_TRUE; 2527 if (pcbddc->n_ISForDofsLocal) { 2528 IS iP = NULL; 2529 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2530 2531 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2532 ierr = PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2533 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2534 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2535 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2536 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2537 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2538 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2539 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2540 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2541 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2542 if (iP) { 2543 IS newpressures; 2544 2545 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2546 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2547 pressures = newpressures; 2548 } 2549 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2550 if (!sorted) { 2551 ierr = ISSort(pressures);CHKERRQ(ierr); 2552 } 2553 } else { 2554 pressures = NULL; 2555 } 2556 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2557 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2558 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2559 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2560 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2561 if (!sorted) { 2562 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2563 } 2564 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2565 zerodiag_save = zerodiag; 2566 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2567 if (!nz) { 2568 if (n) have_null = PETSC_FALSE; 2569 has_null_pressures = PETSC_FALSE; 2570 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2571 } 2572 recompute_zerodiag = PETSC_FALSE; 2573 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2574 zerodiag_subs = NULL; 2575 pcbddc->benign_n = 0; 2576 n_interior_dofs = 0; 2577 interior_dofs = NULL; 2578 nneu = 0; 2579 if (pcbddc->NeumannBoundariesLocal) { 2580 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2581 } 2582 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2583 if (checkb) { /* need to compute interior nodes */ 2584 PetscInt n,i,j; 2585 PetscInt n_neigh,*neigh,*n_shared,**shared; 2586 PetscInt *iwork; 2587 2588 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2589 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2590 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2591 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2592 for (i=1;i<n_neigh;i++) 2593 for (j=0;j<n_shared[i];j++) 2594 iwork[shared[i][j]] += 1; 2595 for (i=0;i<n;i++) 2596 if (!iwork[i]) 2597 interior_dofs[n_interior_dofs++] = i; 2598 ierr = PetscFree(iwork);CHKERRQ(ierr); 2599 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2600 } 2601 if (has_null_pressures) { 2602 IS *subs; 2603 PetscInt nsubs,i,j,nl; 2604 const PetscInt *idxs; 2605 PetscScalar *array; 2606 Vec *work; 2607 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2608 2609 subs = pcbddc->local_subs; 2610 nsubs = pcbddc->n_local_subs; 2611 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2612 if (checkb) { 2613 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2614 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2615 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2616 /* work[0] = 1_p */ 2617 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2618 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2619 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2620 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2621 /* work[0] = 1_v */ 2622 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2623 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2624 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2625 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2626 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2627 } 2628 if (nsubs > 1) { 2629 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2630 for (i=0;i<nsubs;i++) { 2631 ISLocalToGlobalMapping l2g; 2632 IS t_zerodiag_subs; 2633 PetscInt nl; 2634 2635 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2636 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2637 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2638 if (nl) { 2639 PetscBool valid = PETSC_TRUE; 2640 2641 if (checkb) { 2642 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2643 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2644 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2645 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2646 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2647 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2648 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2649 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2650 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2651 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2652 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2653 for (j=0;j<n_interior_dofs;j++) { 2654 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2655 valid = PETSC_FALSE; 2656 break; 2657 } 2658 } 2659 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2660 } 2661 if (valid && nneu) { 2662 const PetscInt *idxs; 2663 PetscInt nzb; 2664 2665 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2666 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2667 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2668 if (nzb) valid = PETSC_FALSE; 2669 } 2670 if (valid && pressures) { 2671 IS t_pressure_subs; 2672 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2673 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2674 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2675 } 2676 if (valid) { 2677 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2678 pcbddc->benign_n++; 2679 } else { 2680 recompute_zerodiag = PETSC_TRUE; 2681 } 2682 } 2683 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2684 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2685 } 2686 } else { /* there's just one subdomain (or zero if they have not been detected */ 2687 PetscBool valid = PETSC_TRUE; 2688 2689 if (nneu) valid = PETSC_FALSE; 2690 if (valid && pressures) { 2691 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2692 } 2693 if (valid && checkb) { 2694 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2695 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2696 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2697 for (j=0;j<n_interior_dofs;j++) { 2698 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2699 valid = PETSC_FALSE; 2700 break; 2701 } 2702 } 2703 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2704 } 2705 if (valid) { 2706 pcbddc->benign_n = 1; 2707 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2708 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2709 zerodiag_subs[0] = zerodiag; 2710 } 2711 } 2712 if (checkb) { 2713 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2714 } 2715 } 2716 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2717 2718 if (!pcbddc->benign_n) { 2719 PetscInt n; 2720 2721 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2722 recompute_zerodiag = PETSC_FALSE; 2723 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2724 if (n) { 2725 has_null_pressures = PETSC_FALSE; 2726 have_null = PETSC_FALSE; 2727 } 2728 } 2729 2730 /* final check for null pressures */ 2731 if (zerodiag && pressures) { 2732 PetscInt nz,np; 2733 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2734 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2735 if (nz != np) have_null = PETSC_FALSE; 2736 } 2737 2738 if (recompute_zerodiag) { 2739 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2740 if (pcbddc->benign_n == 1) { 2741 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2742 zerodiag = zerodiag_subs[0]; 2743 } else { 2744 PetscInt i,nzn,*new_idxs; 2745 2746 nzn = 0; 2747 for (i=0;i<pcbddc->benign_n;i++) { 2748 PetscInt ns; 2749 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2750 nzn += ns; 2751 } 2752 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2753 nzn = 0; 2754 for (i=0;i<pcbddc->benign_n;i++) { 2755 PetscInt ns,*idxs; 2756 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2757 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2758 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2759 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2760 nzn += ns; 2761 } 2762 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2763 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2764 } 2765 have_null = PETSC_FALSE; 2766 } 2767 2768 /* Prepare matrix to compute no-net-flux */ 2769 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2770 Mat A,loc_divudotp; 2771 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2772 IS row,col,isused = NULL; 2773 PetscInt M,N,n,st,n_isused; 2774 2775 if (pressures) { 2776 isused = pressures; 2777 } else { 2778 isused = zerodiag_save; 2779 } 2780 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2781 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2782 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2783 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2784 n_isused = 0; 2785 if (isused) { 2786 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2787 } 2788 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2789 st = st-n_isused; 2790 if (n) { 2791 const PetscInt *gidxs; 2792 2793 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2794 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2795 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2796 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2797 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2798 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2799 } else { 2800 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2801 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2802 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2803 } 2804 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2805 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2806 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2807 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2808 ierr = ISDestroy(&row);CHKERRQ(ierr); 2809 ierr = ISDestroy(&col);CHKERRQ(ierr); 2810 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2811 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2812 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2813 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2814 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2815 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2816 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2817 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2818 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2819 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2820 } 2821 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2822 2823 /* change of basis and p0 dofs */ 2824 if (has_null_pressures) { 2825 IS zerodiagc; 2826 const PetscInt *idxs,*idxsc; 2827 PetscInt i,s,*nnz; 2828 2829 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2830 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2831 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2832 /* local change of basis for pressures */ 2833 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2834 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2835 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2836 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2837 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2838 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2839 for (i=0;i<pcbddc->benign_n;i++) { 2840 PetscInt nzs,j; 2841 2842 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2843 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2844 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2845 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2846 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2847 } 2848 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2849 ierr = PetscFree(nnz);CHKERRQ(ierr); 2850 /* set identity on velocities */ 2851 for (i=0;i<n-nz;i++) { 2852 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2853 } 2854 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2855 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2856 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2857 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2858 /* set change on pressures */ 2859 for (s=0;s<pcbddc->benign_n;s++) { 2860 PetscScalar *array; 2861 PetscInt nzs; 2862 2863 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2864 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2865 for (i=0;i<nzs-1;i++) { 2866 PetscScalar vals[2]; 2867 PetscInt cols[2]; 2868 2869 cols[0] = idxs[i]; 2870 cols[1] = idxs[nzs-1]; 2871 vals[0] = 1.; 2872 vals[1] = 1.; 2873 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2874 } 2875 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2876 for (i=0;i<nzs-1;i++) array[i] = -1.; 2877 array[nzs-1] = 1.; 2878 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2879 /* store local idxs for p0 */ 2880 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2881 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2882 ierr = PetscFree(array);CHKERRQ(ierr); 2883 } 2884 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2885 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2886 /* project if needed */ 2887 if (pcbddc->benign_change_explicit) { 2888 Mat M; 2889 2890 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2891 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2892 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2893 ierr = MatDestroy(&M);CHKERRQ(ierr); 2894 } 2895 /* store global idxs for p0 */ 2896 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2897 } 2898 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2899 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2900 2901 /* determines if the coarse solver will be singular or not */ 2902 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2903 /* determines if the problem has subdomains with 0 pressure block */ 2904 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2905 *zerodiaglocal = zerodiag; 2906 PetscFunctionReturn(0); 2907 } 2908 2909 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2910 { 2911 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2912 PetscScalar *array; 2913 PetscErrorCode ierr; 2914 2915 PetscFunctionBegin; 2916 if (!pcbddc->benign_sf) { 2917 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2918 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2919 } 2920 if (get) { 2921 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2922 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2923 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2924 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2925 } else { 2926 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2927 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2928 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2929 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2930 } 2931 PetscFunctionReturn(0); 2932 } 2933 2934 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2935 { 2936 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2937 PetscErrorCode ierr; 2938 2939 PetscFunctionBegin; 2940 /* TODO: add error checking 2941 - avoid nested pop (or push) calls. 2942 - cannot push before pop. 2943 - cannot call this if pcbddc->local_mat is NULL 2944 */ 2945 if (!pcbddc->benign_n) { 2946 PetscFunctionReturn(0); 2947 } 2948 if (pop) { 2949 if (pcbddc->benign_change_explicit) { 2950 IS is_p0; 2951 MatReuse reuse; 2952 2953 /* extract B_0 */ 2954 reuse = MAT_INITIAL_MATRIX; 2955 if (pcbddc->benign_B0) { 2956 reuse = MAT_REUSE_MATRIX; 2957 } 2958 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2959 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2960 /* remove rows and cols from local problem */ 2961 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2962 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2963 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2964 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2965 } else { 2966 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2967 PetscScalar *vals; 2968 PetscInt i,n,*idxs_ins; 2969 2970 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2971 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2972 if (!pcbddc->benign_B0) { 2973 PetscInt *nnz; 2974 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2975 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2976 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2977 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2978 for (i=0;i<pcbddc->benign_n;i++) { 2979 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2980 nnz[i] = n - nnz[i]; 2981 } 2982 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2983 ierr = PetscFree(nnz);CHKERRQ(ierr); 2984 } 2985 2986 for (i=0;i<pcbddc->benign_n;i++) { 2987 PetscScalar *array; 2988 PetscInt *idxs,j,nz,cum; 2989 2990 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2991 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2992 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2993 for (j=0;j<nz;j++) vals[j] = 1.; 2994 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2995 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2996 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2997 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2998 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2999 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 3000 cum = 0; 3001 for (j=0;j<n;j++) { 3002 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 3003 vals[cum] = array[j]; 3004 idxs_ins[cum] = j; 3005 cum++; 3006 } 3007 } 3008 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 3009 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 3010 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 3011 } 3012 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3013 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3014 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 3015 } 3016 } else { /* push */ 3017 if (pcbddc->benign_change_explicit) { 3018 PetscInt i; 3019 3020 for (i=0;i<pcbddc->benign_n;i++) { 3021 PetscScalar *B0_vals; 3022 PetscInt *B0_cols,B0_ncol; 3023 3024 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3025 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3026 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 3027 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 3028 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 3029 } 3030 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3031 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3032 } else { 3033 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 3034 } 3035 } 3036 PetscFunctionReturn(0); 3037 } 3038 3039 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3040 { 3041 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3042 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3043 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3044 PetscBLASInt *B_iwork,*B_ifail; 3045 PetscScalar *work,lwork; 3046 PetscScalar *St,*S,*eigv; 3047 PetscScalar *Sarray,*Starray; 3048 PetscReal *eigs,thresh,lthresh,uthresh; 3049 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3050 PetscBool allocated_S_St; 3051 #if defined(PETSC_USE_COMPLEX) 3052 PetscReal *rwork; 3053 #endif 3054 PetscErrorCode ierr; 3055 3056 PetscFunctionBegin; 3057 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3058 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3059 if (sub_schurs->n_subs && (!sub_schurs->is_symmetric)) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for this matrix pencil (herm %d, symm %d, posdef %d)",sub_schurs->is_hermitian,sub_schurs->is_symmetric,sub_schurs->is_posdef); 3060 3061 if (pcbddc->dbg_flag) { 3062 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3063 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3064 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3065 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3066 } 3067 3068 if (pcbddc->dbg_flag) { 3069 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 3070 } 3071 3072 /* max size of subsets */ 3073 mss = 0; 3074 for (i=0;i<sub_schurs->n_subs;i++) { 3075 PetscInt subset_size; 3076 3077 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3078 mss = PetscMax(mss,subset_size); 3079 } 3080 3081 /* min/max and threshold */ 3082 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3083 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3084 nmax = PetscMax(nmin,nmax); 3085 allocated_S_St = PETSC_FALSE; 3086 if (nmin || !sub_schurs->is_posdef) { /* XXX */ 3087 allocated_S_St = PETSC_TRUE; 3088 } 3089 3090 /* allocate lapack workspace */ 3091 cum = cum2 = 0; 3092 maxneigs = 0; 3093 for (i=0;i<sub_schurs->n_subs;i++) { 3094 PetscInt n,subset_size; 3095 3096 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3097 n = PetscMin(subset_size,nmax); 3098 cum += subset_size; 3099 cum2 += subset_size*n; 3100 maxneigs = PetscMax(maxneigs,n); 3101 } 3102 if (mss) { 3103 if (sub_schurs->is_symmetric) { 3104 PetscBLASInt B_itype = 1; 3105 PetscBLASInt B_N = mss; 3106 PetscReal zero = 0.0; 3107 PetscReal eps = 0.0; /* dlamch? */ 3108 3109 B_lwork = -1; 3110 S = NULL; 3111 St = NULL; 3112 eigs = NULL; 3113 eigv = NULL; 3114 B_iwork = NULL; 3115 B_ifail = NULL; 3116 #if defined(PETSC_USE_COMPLEX) 3117 rwork = NULL; 3118 #endif 3119 thresh = 1.0; 3120 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3121 #if defined(PETSC_USE_COMPLEX) 3122 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3123 #else 3124 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3125 #endif 3126 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3127 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3128 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3129 } else { 3130 lwork = 0; 3131 } 3132 3133 nv = 0; 3134 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3135 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3136 } 3137 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3138 if (allocated_S_St) { 3139 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3140 } 3141 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3142 #if defined(PETSC_USE_COMPLEX) 3143 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3144 #endif 3145 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3146 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3147 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3148 nv+cum,&pcbddc->adaptive_constraints_idxs, 3149 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3150 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 3151 3152 maxneigs = 0; 3153 cum = cumarray = 0; 3154 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3155 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3156 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3157 const PetscInt *idxs; 3158 3159 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3160 for (cum=0;cum<nv;cum++) { 3161 pcbddc->adaptive_constraints_n[cum] = 1; 3162 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3163 pcbddc->adaptive_constraints_data[cum] = 1.0; 3164 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3165 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3166 } 3167 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3168 } 3169 3170 if (mss) { /* multilevel */ 3171 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3172 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3173 } 3174 3175 lthresh = pcbddc->adaptive_threshold[0]; 3176 uthresh = pcbddc->adaptive_threshold[1]; 3177 for (i=0;i<sub_schurs->n_subs;i++) { 3178 const PetscInt *idxs; 3179 PetscReal upper,lower; 3180 PetscInt j,subset_size,eigs_start = 0; 3181 PetscBLASInt B_N; 3182 PetscBool same_data = PETSC_FALSE; 3183 PetscBool scal = PETSC_FALSE; 3184 3185 if (pcbddc->use_deluxe_scaling) { 3186 upper = PETSC_MAX_REAL; 3187 lower = uthresh; 3188 } else { 3189 if (!sub_schurs->is_posdef) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented without deluxe scaling"); 3190 upper = 1./uthresh; 3191 lower = 0.; 3192 } 3193 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3194 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3195 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3196 /* this is experimental: we assume the dofs have been properly grouped to have 3197 the diagonal blocks Schur complements either positive or negative definite (true for Stokes) */ 3198 if (!sub_schurs->is_posdef) { 3199 Mat T; 3200 3201 for (j=0;j<subset_size;j++) { 3202 if (PetscRealPart(*(Sarray+cumarray+j*(subset_size+1))) < 0.0) { 3203 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Sarray+cumarray,&T);CHKERRQ(ierr); 3204 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3205 ierr = MatDestroy(&T);CHKERRQ(ierr); 3206 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,subset_size,Starray+cumarray,&T);CHKERRQ(ierr); 3207 ierr = MatScale(T,-1.0);CHKERRQ(ierr); 3208 ierr = MatDestroy(&T);CHKERRQ(ierr); 3209 if (sub_schurs->change_primal_sub) { 3210 PetscInt nz,k; 3211 const PetscInt *idxs; 3212 3213 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nz);CHKERRQ(ierr); 3214 ierr = ISGetIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3215 for (k=0;k<nz;k++) { 3216 *( Sarray + cumarray + idxs[k]*(subset_size+1)) *= -1.0; 3217 *(Starray + cumarray + idxs[k]*(subset_size+1)) = 0.0; 3218 } 3219 ierr = ISRestoreIndices(sub_schurs->change_primal_sub[i],&idxs);CHKERRQ(ierr); 3220 } 3221 scal = PETSC_TRUE; 3222 break; 3223 } 3224 } 3225 } 3226 3227 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3228 if (sub_schurs->is_symmetric) { 3229 PetscInt j,k; 3230 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 3231 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3232 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3233 } 3234 for (j=0;j<subset_size;j++) { 3235 for (k=j;k<subset_size;k++) { 3236 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3237 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3238 } 3239 } 3240 } else { 3241 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3242 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3243 } 3244 } else { 3245 S = Sarray + cumarray; 3246 St = Starray + cumarray; 3247 } 3248 /* see if we can save some work */ 3249 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3250 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 3251 } 3252 3253 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3254 B_neigs = 0; 3255 } else { 3256 if (sub_schurs->is_symmetric) { 3257 PetscBLASInt B_itype = 1; 3258 PetscBLASInt B_IL, B_IU; 3259 PetscReal eps = -1.0; /* dlamch? */ 3260 PetscInt nmin_s; 3261 PetscBool compute_range; 3262 3263 B_neigs = 0; 3264 compute_range = (PetscBool)!same_data; 3265 if (nmin >= subset_size) compute_range = PETSC_FALSE; 3266 3267 if (pcbddc->dbg_flag) { 3268 PetscInt nc = 0; 3269 3270 if (sub_schurs->change_primal_sub) { 3271 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nc);CHKERRQ(ierr); 3272 } 3273 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d (range %d) (change %d).\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]],compute_range,nc);CHKERRQ(ierr); 3274 } 3275 3276 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3277 if (compute_range) { 3278 3279 /* ask for eigenvalues larger than thresh */ 3280 if (sub_schurs->is_posdef) { 3281 #if defined(PETSC_USE_COMPLEX) 3282 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3283 #else 3284 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3285 #endif 3286 } else { /* no theory so far, but it works nicely */ 3287 PetscInt recipe = 0,recipe_m = 1; 3288 PetscReal bb[2]; 3289 3290 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe",&recipe,NULL);CHKERRQ(ierr); 3291 switch (recipe) { 3292 case 0: 3293 if (scal) { bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; } 3294 else { bb[0] = uthresh; bb[1] = PETSC_MAX_REAL; } 3295 #if defined(PETSC_USE_COMPLEX) 3296 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3297 #else 3298 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3299 #endif 3300 break; 3301 case 1: 3302 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh*lthresh; 3303 #if defined(PETSC_USE_COMPLEX) 3304 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3305 #else 3306 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3307 #endif 3308 if (!scal) { 3309 PetscBLASInt B_neigs2 = 0; 3310 3311 bb[0] = PetscMax(lthresh*lthresh,uthresh); bb[1] = PETSC_MAX_REAL; 3312 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3313 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3314 #if defined(PETSC_USE_COMPLEX) 3315 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3316 #else 3317 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3318 #endif 3319 B_neigs += B_neigs2; 3320 } 3321 break; 3322 case 2: 3323 if (scal) { 3324 bb[0] = PETSC_MIN_REAL; 3325 bb[1] = 0; 3326 #if defined(PETSC_USE_COMPLEX) 3327 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3328 #else 3329 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3330 #endif 3331 } else { 3332 PetscBLASInt B_neigs2 = 0; 3333 PetscBool import = PETSC_FALSE; 3334 3335 lthresh = PetscMax(lthresh,0.0); 3336 if (lthresh > 0.0) { 3337 bb[0] = PETSC_MIN_REAL; 3338 bb[1] = lthresh*lthresh; 3339 3340 import = PETSC_TRUE; 3341 #if defined(PETSC_USE_COMPLEX) 3342 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3343 #else 3344 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3345 #endif 3346 } 3347 bb[0] = PetscMax(lthresh*lthresh,uthresh); 3348 bb[1] = PETSC_MAX_REAL; 3349 if (import) { 3350 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3351 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3352 } 3353 #if defined(PETSC_USE_COMPLEX) 3354 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3355 #else 3356 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3357 #endif 3358 B_neigs += B_neigs2; 3359 } 3360 break; 3361 case 3: 3362 if (scal) { 3363 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min_scal",&recipe_m,NULL);CHKERRQ(ierr); 3364 } else { 3365 ierr = PetscOptionsGetInt(NULL,((PetscObject)pc)->prefix,"-pc_bddc_adaptive_recipe3_min",&recipe_m,NULL);CHKERRQ(ierr); 3366 } 3367 if (!scal) { 3368 bb[0] = uthresh; 3369 bb[1] = PETSC_MAX_REAL; 3370 #if defined(PETSC_USE_COMPLEX) 3371 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3372 #else 3373 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3374 #endif 3375 } 3376 if (recipe_m > 0 && B_N - B_neigs > 0) { 3377 PetscBLASInt B_neigs2 = 0; 3378 3379 B_IL = 1; 3380 ierr = PetscBLASIntCast(PetscMin(recipe_m,B_N - B_neigs),&B_IU);CHKERRQ(ierr); 3381 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3382 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3383 #if defined(PETSC_USE_COMPLEX) 3384 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3385 #else 3386 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3387 #endif 3388 B_neigs += B_neigs2; 3389 } 3390 break; 3391 case 4: 3392 bb[0] = PETSC_MIN_REAL; bb[1] = lthresh; 3393 #if defined(PETSC_USE_COMPLEX) 3394 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3395 #else 3396 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3397 #endif 3398 { 3399 PetscBLASInt B_neigs2 = 0; 3400 3401 bb[0] = PetscMax(lthresh+PETSC_SMALL,uthresh); bb[1] = PETSC_MAX_REAL; 3402 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3403 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3404 #if defined(PETSC_USE_COMPLEX) 3405 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3406 #else 3407 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&bb[0],&bb[1],&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*B_N,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3408 #endif 3409 B_neigs += B_neigs2; 3410 } 3411 break; 3412 default: 3413 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Unknown recipe %D",recipe); 3414 break; 3415 } 3416 } 3417 } else if (!same_data) { /* this is just to see all the eigenvalues */ 3418 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3419 B_IL = 1; 3420 #if defined(PETSC_USE_COMPLEX) 3421 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3422 #else 3423 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3424 #endif 3425 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3426 PetscInt k; 3427 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3428 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3429 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3430 nmin = nmax; 3431 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 3432 for (k=0;k<nmax;k++) { 3433 eigs[k] = 1./PETSC_SMALL; 3434 eigv[k*(subset_size+1)] = 1.0; 3435 } 3436 } 3437 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3438 if (B_ierr) { 3439 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3440 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3441 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3442 } 3443 3444 if (B_neigs > nmax) { 3445 if (pcbddc->dbg_flag) { 3446 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax);CHKERRQ(ierr); 3447 } 3448 if (pcbddc->use_deluxe_scaling) eigs_start = scal ? 0 : B_neigs-nmax; 3449 B_neigs = nmax; 3450 } 3451 3452 nmin_s = PetscMin(nmin,B_N); 3453 if (B_neigs < nmin_s) { 3454 PetscBLASInt B_neigs2 = 0; 3455 3456 if (pcbddc->use_deluxe_scaling) { 3457 if (scal) { 3458 B_IU = nmin_s; 3459 B_IL = B_neigs + 1; 3460 } else { 3461 B_IL = B_N - nmin_s + 1; 3462 B_IU = B_N - B_neigs; 3463 } 3464 } else { 3465 B_IL = B_neigs + 1; 3466 B_IU = nmin_s; 3467 } 3468 if (pcbddc->dbg_flag) { 3469 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 3470 } 3471 if (sub_schurs->is_symmetric) { 3472 PetscInt j,k; 3473 for (j=0;j<subset_size;j++) { 3474 for (k=j;k<subset_size;k++) { 3475 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3476 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3477 } 3478 } 3479 } else { 3480 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3481 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3482 } 3483 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3484 #if defined(PETSC_USE_COMPLEX) 3485 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3486 #else 3487 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3488 #endif 3489 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3490 B_neigs += B_neigs2; 3491 } 3492 if (B_ierr) { 3493 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3494 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3495 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3496 } 3497 if (pcbddc->dbg_flag) { 3498 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3499 for (j=0;j<B_neigs;j++) { 3500 if (eigs[j] == 0.0) { 3501 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3502 } else { 3503 if (pcbddc->use_deluxe_scaling) { 3504 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3505 } else { 3506 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3507 } 3508 } 3509 } 3510 } 3511 } else SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented"); 3512 } 3513 /* change the basis back to the original one */ 3514 if (sub_schurs->change) { 3515 Mat change,phi,phit; 3516 3517 if (pcbddc->dbg_flag > 2) { 3518 PetscInt ii; 3519 for (ii=0;ii<B_neigs;ii++) { 3520 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3521 for (j=0;j<B_N;j++) { 3522 #if defined(PETSC_USE_COMPLEX) 3523 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3524 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3525 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3526 #else 3527 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3528 #endif 3529 } 3530 } 3531 } 3532 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3533 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3534 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3535 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3536 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3537 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3538 } 3539 maxneigs = PetscMax(B_neigs,maxneigs); 3540 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3541 if (B_neigs) { 3542 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3543 3544 if (pcbddc->dbg_flag > 1) { 3545 PetscInt ii; 3546 for (ii=0;ii<B_neigs;ii++) { 3547 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3548 for (j=0;j<B_N;j++) { 3549 #if defined(PETSC_USE_COMPLEX) 3550 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3551 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3552 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3553 #else 3554 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3555 #endif 3556 } 3557 } 3558 } 3559 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3560 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3561 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3562 cum++; 3563 } 3564 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3565 /* shift for next computation */ 3566 cumarray += subset_size*subset_size; 3567 } 3568 if (pcbddc->dbg_flag) { 3569 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3570 } 3571 3572 if (mss) { 3573 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3574 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3575 /* destroy matrices (junk) */ 3576 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3577 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3578 } 3579 if (allocated_S_St) { 3580 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3581 } 3582 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3583 #if defined(PETSC_USE_COMPLEX) 3584 ierr = PetscFree(rwork);CHKERRQ(ierr); 3585 #endif 3586 if (pcbddc->dbg_flag) { 3587 PetscInt maxneigs_r; 3588 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3589 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3590 } 3591 PetscFunctionReturn(0); 3592 } 3593 3594 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3595 { 3596 PetscScalar *coarse_submat_vals; 3597 PetscErrorCode ierr; 3598 3599 PetscFunctionBegin; 3600 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3601 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3602 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3603 3604 /* Setup local neumann solver ksp_R */ 3605 /* PCBDDCSetUpLocalScatters should be called first! */ 3606 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3607 3608 /* 3609 Setup local correction and local part of coarse basis. 3610 Gives back the dense local part of the coarse matrix in column major ordering 3611 */ 3612 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3613 3614 /* Compute total number of coarse nodes and setup coarse solver */ 3615 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3616 3617 /* free */ 3618 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3619 PetscFunctionReturn(0); 3620 } 3621 3622 PetscErrorCode PCBDDCResetCustomization(PC pc) 3623 { 3624 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3625 PetscErrorCode ierr; 3626 3627 PetscFunctionBegin; 3628 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3629 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3630 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3631 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3632 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3633 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3634 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3635 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3636 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3637 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3638 PetscFunctionReturn(0); 3639 } 3640 3641 PetscErrorCode PCBDDCResetTopography(PC pc) 3642 { 3643 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3644 PetscInt i; 3645 PetscErrorCode ierr; 3646 3647 PetscFunctionBegin; 3648 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3649 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3650 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3651 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3652 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3653 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3654 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3655 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3656 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3657 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3658 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3659 for (i=0;i<pcbddc->n_local_subs;i++) { 3660 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3661 } 3662 pcbddc->n_local_subs = 0; 3663 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3664 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3665 pcbddc->graphanalyzed = PETSC_FALSE; 3666 pcbddc->recompute_topography = PETSC_TRUE; 3667 pcbddc->corner_selected = PETSC_FALSE; 3668 PetscFunctionReturn(0); 3669 } 3670 3671 PetscErrorCode PCBDDCResetSolvers(PC pc) 3672 { 3673 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3674 PetscErrorCode ierr; 3675 3676 PetscFunctionBegin; 3677 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3678 if (pcbddc->coarse_phi_B) { 3679 PetscScalar *array; 3680 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3681 ierr = PetscFree(array);CHKERRQ(ierr); 3682 } 3683 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3684 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3685 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3686 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3687 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3688 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3689 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3690 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3691 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3692 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3693 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3694 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3695 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3696 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3697 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3698 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3699 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3700 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3701 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3702 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3703 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3704 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3705 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3706 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3707 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3708 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3709 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3710 if (pcbddc->benign_zerodiag_subs) { 3711 PetscInt i; 3712 for (i=0;i<pcbddc->benign_n;i++) { 3713 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3714 } 3715 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3716 } 3717 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3718 PetscFunctionReturn(0); 3719 } 3720 3721 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3722 { 3723 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3724 PC_IS *pcis = (PC_IS*)pc->data; 3725 VecType impVecType; 3726 PetscInt n_constraints,n_R,old_size; 3727 PetscErrorCode ierr; 3728 3729 PetscFunctionBegin; 3730 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3731 n_R = pcis->n - pcbddc->n_vertices; 3732 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3733 /* local work vectors (try to avoid unneeded work)*/ 3734 /* R nodes */ 3735 old_size = -1; 3736 if (pcbddc->vec1_R) { 3737 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3738 } 3739 if (n_R != old_size) { 3740 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3741 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3742 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3743 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3744 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3745 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3746 } 3747 /* local primal dofs */ 3748 old_size = -1; 3749 if (pcbddc->vec1_P) { 3750 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3751 } 3752 if (pcbddc->local_primal_size != old_size) { 3753 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3754 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3755 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3756 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3757 } 3758 /* local explicit constraints */ 3759 old_size = -1; 3760 if (pcbddc->vec1_C) { 3761 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3762 } 3763 if (n_constraints && n_constraints != old_size) { 3764 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3765 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3766 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3767 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3768 } 3769 PetscFunctionReturn(0); 3770 } 3771 3772 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3773 { 3774 PetscErrorCode ierr; 3775 /* pointers to pcis and pcbddc */ 3776 PC_IS* pcis = (PC_IS*)pc->data; 3777 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3778 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3779 /* submatrices of local problem */ 3780 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3781 /* submatrices of local coarse problem */ 3782 Mat S_VV,S_CV,S_VC,S_CC; 3783 /* working matrices */ 3784 Mat C_CR; 3785 /* additional working stuff */ 3786 PC pc_R; 3787 Mat F,Brhs = NULL; 3788 Vec dummy_vec; 3789 PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs; 3790 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3791 PetscScalar *work; 3792 PetscInt *idx_V_B; 3793 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3794 PetscInt i,n_R,n_D,n_B; 3795 3796 /* some shortcuts to scalars */ 3797 PetscScalar one=1.0,m_one=-1.0; 3798 3799 PetscFunctionBegin; 3800 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3801 3802 /* Set Non-overlapping dimensions */ 3803 n_vertices = pcbddc->n_vertices; 3804 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3805 n_B = pcis->n_B; 3806 n_D = pcis->n - n_B; 3807 n_R = pcis->n - n_vertices; 3808 3809 /* vertices in boundary numbering */ 3810 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3811 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3812 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3813 3814 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3815 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3816 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3817 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3818 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3819 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3820 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3821 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3822 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3823 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3824 3825 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3826 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3827 ierr = PCSetUp(pc_R);CHKERRQ(ierr); 3828 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3829 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3830 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3831 lda_rhs = n_R; 3832 need_benign_correction = PETSC_FALSE; 3833 if (isLU || isILU || isCHOL) { 3834 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3835 } else if (sub_schurs && sub_schurs->reuse_solver) { 3836 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3837 MatFactorType type; 3838 3839 F = reuse_solver->F; 3840 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3841 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3842 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3843 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3844 } else { 3845 F = NULL; 3846 } 3847 3848 /* determine if we can use a sparse right-hand side */ 3849 sparserhs = PETSC_FALSE; 3850 if (F) { 3851 MatSolverType solver; 3852 3853 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3854 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3855 } 3856 3857 /* allocate workspace */ 3858 n = 0; 3859 if (n_constraints) { 3860 n += lda_rhs*n_constraints; 3861 } 3862 if (n_vertices) { 3863 n = PetscMax(2*lda_rhs*n_vertices,n); 3864 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3865 } 3866 if (!pcbddc->symmetric_primal) { 3867 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3868 } 3869 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3870 3871 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3872 dummy_vec = NULL; 3873 if (need_benign_correction && lda_rhs != n_R && F) { 3874 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3875 } 3876 3877 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3878 if (n_constraints) { 3879 Mat M3,C_B; 3880 IS is_aux; 3881 PetscScalar *array,*array2; 3882 3883 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3884 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3885 3886 /* Extract constraints on R nodes: C_{CR} */ 3887 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3888 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3889 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3890 3891 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3892 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3893 if (!sparserhs) { 3894 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3895 for (i=0;i<n_constraints;i++) { 3896 const PetscScalar *row_cmat_values; 3897 const PetscInt *row_cmat_indices; 3898 PetscInt size_of_constraint,j; 3899 3900 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3901 for (j=0;j<size_of_constraint;j++) { 3902 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3903 } 3904 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3905 } 3906 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 3907 } else { 3908 Mat tC_CR; 3909 3910 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3911 if (lda_rhs != n_R) { 3912 PetscScalar *aa; 3913 PetscInt r,*ii,*jj; 3914 PetscBool done; 3915 3916 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3917 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 3918 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 3919 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 3920 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3921 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 3922 } else { 3923 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 3924 tC_CR = C_CR; 3925 } 3926 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 3927 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 3928 } 3929 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3930 if (F) { 3931 if (need_benign_correction) { 3932 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3933 3934 /* rhs is already zero on interior dofs, no need to change the rhs */ 3935 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3936 } 3937 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 3938 if (need_benign_correction) { 3939 PetscScalar *marr; 3940 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3941 3942 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3943 if (lda_rhs != n_R) { 3944 for (i=0;i<n_constraints;i++) { 3945 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3946 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3947 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3948 } 3949 } else { 3950 for (i=0;i<n_constraints;i++) { 3951 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3952 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3953 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3954 } 3955 } 3956 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3957 } 3958 } else { 3959 PetscScalar *marr; 3960 3961 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3962 for (i=0;i<n_constraints;i++) { 3963 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3964 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3965 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3966 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3967 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3968 } 3969 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3970 } 3971 if (sparserhs) { 3972 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3973 } 3974 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3975 if (!pcbddc->switch_static) { 3976 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3977 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3978 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3979 for (i=0;i<n_constraints;i++) { 3980 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3981 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3982 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3983 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3984 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3985 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3986 } 3987 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3988 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3989 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3990 } else { 3991 if (lda_rhs != n_R) { 3992 IS dummy; 3993 3994 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3995 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3996 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3997 } else { 3998 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3999 pcbddc->local_auxmat2 = local_auxmat2_R; 4000 } 4001 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 4002 } 4003 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4004 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 4005 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 4006 if (isCHOL) { 4007 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 4008 } else { 4009 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 4010 } 4011 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 4012 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 4013 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 4014 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4015 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 4016 ierr = MatDestroy(&M3);CHKERRQ(ierr); 4017 } 4018 4019 /* Get submatrices from subdomain matrix */ 4020 if (n_vertices) { 4021 IS is_aux; 4022 PetscBool isseqaij; 4023 4024 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 4025 IS tis; 4026 4027 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 4028 ierr = ISSort(tis);CHKERRQ(ierr); 4029 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 4030 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4031 } else { 4032 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 4033 } 4034 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 4035 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 4036 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4037 if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */ 4038 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4039 } 4040 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 4041 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 4042 } 4043 4044 /* Matrix of coarse basis functions (local) */ 4045 if (pcbddc->coarse_phi_B) { 4046 PetscInt on_B,on_primal,on_D=n_D; 4047 if (pcbddc->coarse_phi_D) { 4048 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 4049 } 4050 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 4051 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 4052 PetscScalar *marray; 4053 4054 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 4055 ierr = PetscFree(marray);CHKERRQ(ierr); 4056 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4057 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4058 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4059 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4060 } 4061 } 4062 4063 if (!pcbddc->coarse_phi_B) { 4064 PetscScalar *marr; 4065 4066 /* memory size */ 4067 n = n_B*pcbddc->local_primal_size; 4068 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 4069 if (!pcbddc->symmetric_primal) n *= 2; 4070 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 4071 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 4072 marr += n_B*pcbddc->local_primal_size; 4073 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4074 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 4075 marr += n_D*pcbddc->local_primal_size; 4076 } 4077 if (!pcbddc->symmetric_primal) { 4078 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 4079 marr += n_B*pcbddc->local_primal_size; 4080 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4081 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 4082 } 4083 } else { 4084 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 4085 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 4086 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4087 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 4088 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 4089 } 4090 } 4091 } 4092 4093 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 4094 p0_lidx_I = NULL; 4095 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 4096 const PetscInt *idxs; 4097 4098 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4099 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 4100 for (i=0;i<pcbddc->benign_n;i++) { 4101 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 4102 } 4103 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 4104 } 4105 4106 /* vertices */ 4107 if (n_vertices) { 4108 PetscBool restoreavr = PETSC_FALSE; 4109 4110 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 4111 4112 if (n_R) { 4113 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 4114 PetscBLASInt B_N,B_one = 1; 4115 PetscScalar *x,*y; 4116 4117 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 4118 if (need_benign_correction) { 4119 ISLocalToGlobalMapping RtoN; 4120 IS is_p0; 4121 PetscInt *idxs_p0,n; 4122 4123 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 4124 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 4125 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 4126 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 4127 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 4128 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 4129 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 4130 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 4131 } 4132 4133 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4134 if (!sparserhs || need_benign_correction) { 4135 if (lda_rhs == n_R) { 4136 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4137 } else { 4138 PetscScalar *av,*array; 4139 const PetscInt *xadj,*adjncy; 4140 PetscInt n; 4141 PetscBool flg_row; 4142 4143 array = work+lda_rhs*n_vertices; 4144 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4145 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 4146 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4147 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 4148 for (i=0;i<n;i++) { 4149 PetscInt j; 4150 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 4151 } 4152 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4153 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4154 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 4155 } 4156 if (need_benign_correction) { 4157 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4158 PetscScalar *marr; 4159 4160 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 4161 /* need \Phi^T A_RV = (I+L)A_RV, L given by 4162 4163 | 0 0 0 | (V) 4164 L = | 0 0 -1 | (P-p0) 4165 | 0 0 -1 | (p0) 4166 4167 */ 4168 for (i=0;i<reuse_solver->benign_n;i++) { 4169 const PetscScalar *vals; 4170 const PetscInt *idxs,*idxs_zero; 4171 PetscInt n,j,nz; 4172 4173 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4174 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4175 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4176 for (j=0;j<n;j++) { 4177 PetscScalar val = vals[j]; 4178 PetscInt k,col = idxs[j]; 4179 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 4180 } 4181 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4182 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4183 } 4184 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 4185 } 4186 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 4187 Brhs = A_RV; 4188 } else { 4189 Mat tA_RVT,A_RVT; 4190 4191 if (!pcbddc->symmetric_primal) { 4192 /* A_RV already scaled by -1 */ 4193 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 4194 } else { 4195 restoreavr = PETSC_TRUE; 4196 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4197 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 4198 A_RVT = A_VR; 4199 } 4200 if (lda_rhs != n_R) { 4201 PetscScalar *aa; 4202 PetscInt r,*ii,*jj; 4203 PetscBool done; 4204 4205 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4206 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4207 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4208 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4209 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4210 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4211 } else { 4212 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4213 tA_RVT = A_RVT; 4214 } 4215 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4216 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4217 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4218 } 4219 if (F) { 4220 /* need to correct the rhs */ 4221 if (need_benign_correction) { 4222 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4223 PetscScalar *marr; 4224 4225 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4226 if (lda_rhs != n_R) { 4227 for (i=0;i<n_vertices;i++) { 4228 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4229 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4230 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4231 } 4232 } else { 4233 for (i=0;i<n_vertices;i++) { 4234 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4235 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4236 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4237 } 4238 } 4239 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4240 } 4241 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4242 if (restoreavr) { 4243 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4244 } 4245 /* need to correct the solution */ 4246 if (need_benign_correction) { 4247 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4248 PetscScalar *marr; 4249 4250 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4251 if (lda_rhs != n_R) { 4252 for (i=0;i<n_vertices;i++) { 4253 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4254 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4255 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4256 } 4257 } else { 4258 for (i=0;i<n_vertices;i++) { 4259 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4260 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4261 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4262 } 4263 } 4264 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4265 } 4266 } else { 4267 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4268 for (i=0;i<n_vertices;i++) { 4269 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4270 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4271 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4272 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4273 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4274 } 4275 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4276 } 4277 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4278 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4279 /* S_VV and S_CV */ 4280 if (n_constraints) { 4281 Mat B; 4282 4283 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4284 for (i=0;i<n_vertices;i++) { 4285 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4286 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4287 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4288 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4289 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4290 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4291 } 4292 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4293 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 4294 ierr = MatDestroy(&B);CHKERRQ(ierr); 4295 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4296 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4297 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4298 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4299 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4300 ierr = MatDestroy(&B);CHKERRQ(ierr); 4301 } 4302 if (lda_rhs != n_R) { 4303 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4304 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4305 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4306 } 4307 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4308 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4309 if (need_benign_correction) { 4310 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4311 PetscScalar *marr,*sums; 4312 4313 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4314 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4315 for (i=0;i<reuse_solver->benign_n;i++) { 4316 const PetscScalar *vals; 4317 const PetscInt *idxs,*idxs_zero; 4318 PetscInt n,j,nz; 4319 4320 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4321 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4322 for (j=0;j<n_vertices;j++) { 4323 PetscInt k; 4324 sums[j] = 0.; 4325 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4326 } 4327 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4328 for (j=0;j<n;j++) { 4329 PetscScalar val = vals[j]; 4330 PetscInt k; 4331 for (k=0;k<n_vertices;k++) { 4332 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4333 } 4334 } 4335 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4336 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4337 } 4338 ierr = PetscFree(sums);CHKERRQ(ierr); 4339 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4340 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4341 } 4342 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4343 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4344 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 4345 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4346 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4347 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 4348 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4349 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4350 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4351 } else { 4352 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4353 } 4354 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4355 4356 /* coarse basis functions */ 4357 for (i=0;i<n_vertices;i++) { 4358 PetscScalar *y; 4359 4360 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4361 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4362 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4363 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4364 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4365 y[n_B*i+idx_V_B[i]] = 1.0; 4366 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4367 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4368 4369 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4370 PetscInt j; 4371 4372 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4373 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4374 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4375 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4376 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4377 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4378 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4379 } 4380 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4381 } 4382 /* if n_R == 0 the object is not destroyed */ 4383 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4384 } 4385 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4386 4387 if (n_constraints) { 4388 Mat B; 4389 4390 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4391 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4392 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4393 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4394 if (n_vertices) { 4395 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4396 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4397 } else { 4398 Mat S_VCt; 4399 4400 if (lda_rhs != n_R) { 4401 ierr = MatDestroy(&B);CHKERRQ(ierr); 4402 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4403 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4404 } 4405 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4406 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4407 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4408 } 4409 } 4410 ierr = MatDestroy(&B);CHKERRQ(ierr); 4411 /* coarse basis functions */ 4412 for (i=0;i<n_constraints;i++) { 4413 PetscScalar *y; 4414 4415 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4416 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4417 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4418 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4419 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4420 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4421 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4422 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4423 PetscInt j; 4424 4425 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4426 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4427 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4428 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4429 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4430 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4431 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4432 } 4433 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4434 } 4435 } 4436 if (n_constraints) { 4437 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4438 } 4439 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4440 4441 /* coarse matrix entries relative to B_0 */ 4442 if (pcbddc->benign_n) { 4443 Mat B0_B,B0_BPHI; 4444 IS is_dummy; 4445 PetscScalar *data; 4446 PetscInt j; 4447 4448 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4449 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4450 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4451 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4452 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4453 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 4454 for (j=0;j<pcbddc->benign_n;j++) { 4455 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4456 for (i=0;i<pcbddc->local_primal_size;i++) { 4457 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4458 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4459 } 4460 } 4461 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 4462 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4463 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4464 } 4465 4466 /* compute other basis functions for non-symmetric problems */ 4467 if (!pcbddc->symmetric_primal) { 4468 Mat B_V=NULL,B_C=NULL; 4469 PetscScalar *marray; 4470 4471 if (n_constraints) { 4472 Mat S_CCT,C_CRT; 4473 4474 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4475 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4476 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4477 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4478 if (n_vertices) { 4479 Mat S_VCT; 4480 4481 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4482 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4483 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4484 } 4485 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4486 } else { 4487 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4488 } 4489 if (n_vertices && n_R) { 4490 PetscScalar *av,*marray; 4491 const PetscInt *xadj,*adjncy; 4492 PetscInt n; 4493 PetscBool flg_row; 4494 4495 /* B_V = B_V - A_VR^T */ 4496 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4497 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4498 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4499 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4500 for (i=0;i<n;i++) { 4501 PetscInt j; 4502 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4503 } 4504 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4505 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4506 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4507 } 4508 4509 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4510 if (n_vertices) { 4511 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4512 for (i=0;i<n_vertices;i++) { 4513 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4514 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4515 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4516 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4517 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4518 } 4519 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4520 } 4521 if (B_C) { 4522 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4523 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4524 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4525 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4526 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4527 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4528 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4529 } 4530 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4531 } 4532 /* coarse basis functions */ 4533 for (i=0;i<pcbddc->local_primal_size;i++) { 4534 PetscScalar *y; 4535 4536 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4537 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4538 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4539 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4540 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4541 if (i<n_vertices) { 4542 y[n_B*i+idx_V_B[i]] = 1.0; 4543 } 4544 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4545 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4546 4547 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4548 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4549 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4550 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4551 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4552 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4553 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4554 } 4555 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4556 } 4557 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4558 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4559 } 4560 4561 /* free memory */ 4562 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4563 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4564 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4565 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4566 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4567 ierr = PetscFree(work);CHKERRQ(ierr); 4568 if (n_vertices) { 4569 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4570 } 4571 if (n_constraints) { 4572 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4573 } 4574 /* Checking coarse_sub_mat and coarse basis functios */ 4575 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4576 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4577 if (pcbddc->dbg_flag) { 4578 Mat coarse_sub_mat; 4579 Mat AUXMAT,TM1,TM2,TM3,TM4; 4580 Mat coarse_phi_D,coarse_phi_B; 4581 Mat coarse_psi_D,coarse_psi_B; 4582 Mat A_II,A_BB,A_IB,A_BI; 4583 Mat C_B,CPHI; 4584 IS is_dummy; 4585 Vec mones; 4586 MatType checkmattype=MATSEQAIJ; 4587 PetscReal real_value; 4588 4589 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4590 Mat A; 4591 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4592 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4593 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4594 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4595 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4596 ierr = MatDestroy(&A);CHKERRQ(ierr); 4597 } else { 4598 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4599 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4600 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4601 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4602 } 4603 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4604 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4605 if (!pcbddc->symmetric_primal) { 4606 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4607 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4608 } 4609 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4610 4611 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4612 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4613 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4614 if (!pcbddc->symmetric_primal) { 4615 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4616 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4617 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4618 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4619 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4620 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4621 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4622 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4623 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4624 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4625 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4626 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4627 } else { 4628 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4629 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4630 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4631 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4632 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4633 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4634 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4635 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4636 } 4637 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4638 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4639 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4640 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4641 if (pcbddc->benign_n) { 4642 Mat B0_B,B0_BPHI; 4643 PetscScalar *data,*data2; 4644 PetscInt j; 4645 4646 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4647 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4648 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4649 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4650 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4651 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4652 for (j=0;j<pcbddc->benign_n;j++) { 4653 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4654 for (i=0;i<pcbddc->local_primal_size;i++) { 4655 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4656 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4657 } 4658 } 4659 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4660 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4661 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4662 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4663 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4664 } 4665 #if 0 4666 { 4667 PetscViewer viewer; 4668 char filename[256]; 4669 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4670 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4671 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4672 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4673 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4674 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4675 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4676 if (pcbddc->coarse_phi_B) { 4677 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4678 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4679 } 4680 if (pcbddc->coarse_phi_D) { 4681 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4682 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4683 } 4684 if (pcbddc->coarse_psi_B) { 4685 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4686 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4687 } 4688 if (pcbddc->coarse_psi_D) { 4689 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4690 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4691 } 4692 ierr = PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");CHKERRQ(ierr); 4693 ierr = MatView(pcbddc->local_mat,viewer);CHKERRQ(ierr); 4694 ierr = PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");CHKERRQ(ierr); 4695 ierr = MatView(pcbddc->ConstraintMatrix,viewer);CHKERRQ(ierr); 4696 ierr = PetscObjectSetName((PetscObject)pcis->is_I_local,"I");CHKERRQ(ierr); 4697 ierr = ISView(pcis->is_I_local,viewer);CHKERRQ(ierr); 4698 ierr = PetscObjectSetName((PetscObject)pcis->is_B_local,"B");CHKERRQ(ierr); 4699 ierr = ISView(pcis->is_B_local,viewer);CHKERRQ(ierr); 4700 ierr = PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");CHKERRQ(ierr); 4701 ierr = ISView(pcbddc->is_R_local,viewer);CHKERRQ(ierr); 4702 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4703 } 4704 #endif 4705 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4706 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4707 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4708 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4709 4710 /* check constraints */ 4711 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4712 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4713 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4714 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4715 } else { 4716 PetscScalar *data; 4717 Mat tmat; 4718 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4719 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4720 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4721 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4722 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4723 } 4724 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4725 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4726 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4727 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4728 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4729 if (!pcbddc->symmetric_primal) { 4730 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4731 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4732 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4733 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4734 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4735 } 4736 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4737 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4738 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4739 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4740 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4741 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4742 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4743 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4744 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4745 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4746 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4747 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4748 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4749 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4750 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4751 if (!pcbddc->symmetric_primal) { 4752 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4753 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4754 } 4755 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4756 } 4757 /* get back data */ 4758 *coarse_submat_vals_n = coarse_submat_vals; 4759 PetscFunctionReturn(0); 4760 } 4761 4762 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4763 { 4764 Mat *work_mat; 4765 IS isrow_s,iscol_s; 4766 PetscBool rsorted,csorted; 4767 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4768 PetscErrorCode ierr; 4769 4770 PetscFunctionBegin; 4771 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4772 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4773 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4774 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4775 4776 if (!rsorted) { 4777 const PetscInt *idxs; 4778 PetscInt *idxs_sorted,i; 4779 4780 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4781 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4782 for (i=0;i<rsize;i++) { 4783 idxs_perm_r[i] = i; 4784 } 4785 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4786 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4787 for (i=0;i<rsize;i++) { 4788 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4789 } 4790 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4791 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4792 } else { 4793 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4794 isrow_s = isrow; 4795 } 4796 4797 if (!csorted) { 4798 if (isrow == iscol) { 4799 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4800 iscol_s = isrow_s; 4801 } else { 4802 const PetscInt *idxs; 4803 PetscInt *idxs_sorted,i; 4804 4805 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4806 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4807 for (i=0;i<csize;i++) { 4808 idxs_perm_c[i] = i; 4809 } 4810 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4811 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4812 for (i=0;i<csize;i++) { 4813 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4814 } 4815 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4816 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4817 } 4818 } else { 4819 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4820 iscol_s = iscol; 4821 } 4822 4823 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4824 4825 if (!rsorted || !csorted) { 4826 Mat new_mat; 4827 IS is_perm_r,is_perm_c; 4828 4829 if (!rsorted) { 4830 PetscInt *idxs_r,i; 4831 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4832 for (i=0;i<rsize;i++) { 4833 idxs_r[idxs_perm_r[i]] = i; 4834 } 4835 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4836 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4837 } else { 4838 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4839 } 4840 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4841 4842 if (!csorted) { 4843 if (isrow_s == iscol_s) { 4844 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4845 is_perm_c = is_perm_r; 4846 } else { 4847 PetscInt *idxs_c,i; 4848 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4849 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4850 for (i=0;i<csize;i++) { 4851 idxs_c[idxs_perm_c[i]] = i; 4852 } 4853 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4854 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4855 } 4856 } else { 4857 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4858 } 4859 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4860 4861 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4862 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4863 work_mat[0] = new_mat; 4864 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4865 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4866 } 4867 4868 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4869 *B = work_mat[0]; 4870 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4871 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4872 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4873 PetscFunctionReturn(0); 4874 } 4875 4876 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4877 { 4878 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4879 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4880 Mat new_mat,lA; 4881 IS is_local,is_global; 4882 PetscInt local_size; 4883 PetscBool isseqaij; 4884 PetscErrorCode ierr; 4885 4886 PetscFunctionBegin; 4887 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4888 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4889 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4890 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4891 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4892 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4893 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4894 4895 /* check */ 4896 if (pcbddc->dbg_flag) { 4897 Vec x,x_change; 4898 PetscReal error; 4899 4900 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4901 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4902 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4903 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4904 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4905 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4906 if (!pcbddc->change_interior) { 4907 const PetscScalar *x,*y,*v; 4908 PetscReal lerror = 0.; 4909 PetscInt i; 4910 4911 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4912 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4913 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4914 for (i=0;i<local_size;i++) 4915 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4916 lerror = PetscAbsScalar(x[i]-y[i]); 4917 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4918 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4919 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4920 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4921 if (error > PETSC_SMALL) { 4922 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4923 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4924 } else { 4925 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4926 } 4927 } 4928 } 4929 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4930 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4931 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4932 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4933 if (error > PETSC_SMALL) { 4934 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4935 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4936 } else { 4937 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4938 } 4939 } 4940 ierr = VecDestroy(&x);CHKERRQ(ierr); 4941 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4942 } 4943 4944 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4945 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4946 4947 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4948 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4949 if (isseqaij) { 4950 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4951 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4952 if (lA) { 4953 Mat work; 4954 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4955 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4956 ierr = MatDestroy(&work);CHKERRQ(ierr); 4957 } 4958 } else { 4959 Mat work_mat; 4960 4961 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4962 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4963 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4964 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4965 if (lA) { 4966 Mat work; 4967 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4968 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4969 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4970 ierr = MatDestroy(&work);CHKERRQ(ierr); 4971 } 4972 } 4973 if (matis->A->symmetric_set) { 4974 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4975 #if !defined(PETSC_USE_COMPLEX) 4976 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4977 #endif 4978 } 4979 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4980 PetscFunctionReturn(0); 4981 } 4982 4983 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4984 { 4985 PC_IS* pcis = (PC_IS*)(pc->data); 4986 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4987 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4988 PetscInt *idx_R_local=NULL; 4989 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4990 PetscInt vbs,bs; 4991 PetscBT bitmask=NULL; 4992 PetscErrorCode ierr; 4993 4994 PetscFunctionBegin; 4995 /* 4996 No need to setup local scatters if 4997 - primal space is unchanged 4998 AND 4999 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 5000 AND 5001 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 5002 */ 5003 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 5004 PetscFunctionReturn(0); 5005 } 5006 /* destroy old objects */ 5007 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 5008 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 5009 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 5010 /* Set Non-overlapping dimensions */ 5011 n_B = pcis->n_B; 5012 n_D = pcis->n - n_B; 5013 n_vertices = pcbddc->n_vertices; 5014 5015 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 5016 5017 /* create auxiliary bitmask and allocate workspace */ 5018 if (!sub_schurs || !sub_schurs->reuse_solver) { 5019 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 5020 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 5021 for (i=0;i<n_vertices;i++) { 5022 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 5023 } 5024 5025 for (i=0, n_R=0; i<pcis->n; i++) { 5026 if (!PetscBTLookup(bitmask,i)) { 5027 idx_R_local[n_R++] = i; 5028 } 5029 } 5030 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 5031 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5032 5033 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5034 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 5035 } 5036 5037 /* Block code */ 5038 vbs = 1; 5039 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 5040 if (bs>1 && !(n_vertices%bs)) { 5041 PetscBool is_blocked = PETSC_TRUE; 5042 PetscInt *vary; 5043 if (!sub_schurs || !sub_schurs->reuse_solver) { 5044 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 5045 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 5046 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 5047 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 5048 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 5049 for (i=0; i<pcis->n/bs; i++) { 5050 if (vary[i]!=0 && vary[i]!=bs) { 5051 is_blocked = PETSC_FALSE; 5052 break; 5053 } 5054 } 5055 ierr = PetscFree(vary);CHKERRQ(ierr); 5056 } else { 5057 /* Verify directly the R set */ 5058 for (i=0; i<n_R/bs; i++) { 5059 PetscInt j,node=idx_R_local[bs*i]; 5060 for (j=1; j<bs; j++) { 5061 if (node != idx_R_local[bs*i+j]-j) { 5062 is_blocked = PETSC_FALSE; 5063 break; 5064 } 5065 } 5066 } 5067 } 5068 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 5069 vbs = bs; 5070 for (i=0;i<n_R/vbs;i++) { 5071 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 5072 } 5073 } 5074 } 5075 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 5076 if (sub_schurs && sub_schurs->reuse_solver) { 5077 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5078 5079 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5080 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 5081 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 5082 reuse_solver->is_R = pcbddc->is_R_local; 5083 } else { 5084 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 5085 } 5086 5087 /* print some info if requested */ 5088 if (pcbddc->dbg_flag) { 5089 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5090 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5091 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5092 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 5093 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 5094 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 5095 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5096 } 5097 5098 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 5099 if (!sub_schurs || !sub_schurs->reuse_solver) { 5100 IS is_aux1,is_aux2; 5101 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 5102 5103 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5104 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 5105 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 5106 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5107 for (i=0; i<n_D; i++) { 5108 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 5109 } 5110 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5111 for (i=0, j=0; i<n_R; i++) { 5112 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 5113 aux_array1[j++] = i; 5114 } 5115 } 5116 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5117 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5118 for (i=0, j=0; i<n_B; i++) { 5119 if (!PetscBTLookup(bitmask,is_indices[i])) { 5120 aux_array2[j++] = i; 5121 } 5122 } 5123 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5124 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 5125 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 5126 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5127 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 5128 5129 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5130 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 5131 for (i=0, j=0; i<n_R; i++) { 5132 if (PetscBTLookup(bitmask,idx_R_local[i])) { 5133 aux_array1[j++] = i; 5134 } 5135 } 5136 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 5137 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5138 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 5139 } 5140 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 5141 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 5142 } else { 5143 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5144 IS tis; 5145 PetscInt schur_size; 5146 5147 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 5148 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 5149 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 5150 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5151 if (pcbddc->switch_static || pcbddc->dbg_flag) { 5152 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 5153 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 5154 ierr = ISDestroy(&tis);CHKERRQ(ierr); 5155 } 5156 } 5157 PetscFunctionReturn(0); 5158 } 5159 5160 5161 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 5162 { 5163 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 5164 PC_IS *pcis = (PC_IS*)pc->data; 5165 PC pc_temp; 5166 Mat A_RR; 5167 MatReuse reuse; 5168 PetscScalar m_one = -1.0; 5169 PetscReal value; 5170 PetscInt n_D,n_R; 5171 PetscBool check_corr,issbaij; 5172 PetscErrorCode ierr; 5173 /* prefixes stuff */ 5174 char dir_prefix[256],neu_prefix[256],str_level[16]; 5175 size_t len; 5176 5177 PetscFunctionBegin; 5178 5179 /* compute prefixes */ 5180 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 5181 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 5182 if (!pcbddc->current_level) { 5183 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5184 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 5185 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 5186 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 5187 } else { 5188 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 5189 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 5190 len -= 15; /* remove "pc_bddc_coarse_" */ 5191 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 5192 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 5193 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5194 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 5195 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 5196 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 5197 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 5198 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 5199 } 5200 5201 /* DIRICHLET PROBLEM */ 5202 if (dirichlet) { 5203 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5204 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 5205 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 5206 if (pcbddc->dbg_flag) { 5207 Mat A_IIn; 5208 5209 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5210 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5211 pcis->A_II = A_IIn; 5212 } 5213 } 5214 if (pcbddc->local_mat->symmetric_set) { 5215 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5216 } 5217 /* Matrix for Dirichlet problem is pcis->A_II */ 5218 n_D = pcis->n - pcis->n_B; 5219 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5220 void (*f)(void) = 0; 5221 5222 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5223 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5224 /* default */ 5225 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5226 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5227 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5228 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5229 if (issbaij) { 5230 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5231 } else { 5232 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5233 } 5234 /* Allow user's customization */ 5235 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5236 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5237 if (f && pcbddc->mat_graph->cloc) { 5238 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5239 const PetscInt *idxs; 5240 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5241 5242 ierr = ISGetLocalSize(pcis->is_I_local,&nl);CHKERRQ(ierr); 5243 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5244 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5245 for (i=0;i<nl;i++) { 5246 for (d=0;d<cdim;d++) { 5247 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5248 } 5249 } 5250 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 5251 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5252 ierr = PetscFree(scoords);CHKERRQ(ierr); 5253 } 5254 } 5255 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 5256 if (sub_schurs && sub_schurs->reuse_solver) { 5257 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5258 5259 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5260 } 5261 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5262 if (!n_D) { 5263 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5264 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5265 } 5266 /* set ksp_D into pcis data */ 5267 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5268 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5269 pcis->ksp_D = pcbddc->ksp_D; 5270 } 5271 5272 /* NEUMANN PROBLEM */ 5273 A_RR = 0; 5274 if (neumann) { 5275 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5276 PetscInt ibs,mbs; 5277 PetscBool issbaij, reuse_neumann_solver; 5278 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5279 5280 reuse_neumann_solver = PETSC_FALSE; 5281 if (sub_schurs && sub_schurs->reuse_solver) { 5282 IS iP; 5283 5284 reuse_neumann_solver = PETSC_TRUE; 5285 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5286 if (iP) reuse_neumann_solver = PETSC_FALSE; 5287 } 5288 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5289 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5290 if (pcbddc->ksp_R) { /* already created ksp */ 5291 PetscInt nn_R; 5292 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5293 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5294 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5295 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5296 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5297 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5298 reuse = MAT_INITIAL_MATRIX; 5299 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5300 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5301 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5302 reuse = MAT_INITIAL_MATRIX; 5303 } else { /* safe to reuse the matrix */ 5304 reuse = MAT_REUSE_MATRIX; 5305 } 5306 } 5307 /* last check */ 5308 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5309 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5310 reuse = MAT_INITIAL_MATRIX; 5311 } 5312 } else { /* first time, so we need to create the matrix */ 5313 reuse = MAT_INITIAL_MATRIX; 5314 } 5315 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 5316 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5317 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5318 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5319 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5320 if (matis->A == pcbddc->local_mat) { 5321 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5322 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5323 } else { 5324 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5325 } 5326 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5327 if (matis->A == pcbddc->local_mat) { 5328 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5329 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5330 } else { 5331 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5332 } 5333 } 5334 /* extract A_RR */ 5335 if (reuse_neumann_solver) { 5336 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5337 5338 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5339 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5340 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5341 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5342 } else { 5343 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5344 } 5345 } else { 5346 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5347 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5348 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5349 } 5350 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5351 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5352 } 5353 if (pcbddc->local_mat->symmetric_set) { 5354 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5355 } 5356 if (!pcbddc->ksp_R) { /* create object if not present */ 5357 void (*f)(void) = 0; 5358 5359 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5360 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5361 /* default */ 5362 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5363 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5364 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5365 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5366 if (issbaij) { 5367 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5368 } else { 5369 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5370 } 5371 /* Allow user's customization */ 5372 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5373 ierr = PetscObjectQueryFunction((PetscObject)pc_temp,"PCSetCoordinates_C",&f);CHKERRQ(ierr); 5374 if (f && pcbddc->mat_graph->cloc) { 5375 PetscReal *coords = pcbddc->mat_graph->coords,*scoords; 5376 const PetscInt *idxs; 5377 PetscInt cdim = pcbddc->mat_graph->cdim,nl,i,d; 5378 5379 ierr = ISGetLocalSize(pcbddc->is_R_local,&nl);CHKERRQ(ierr); 5380 ierr = ISGetIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5381 ierr = PetscMalloc1(nl*cdim,&scoords);CHKERRQ(ierr); 5382 for (i=0;i<nl;i++) { 5383 for (d=0;d<cdim;d++) { 5384 scoords[i*cdim+d] = coords[idxs[i]*cdim+d]; 5385 } 5386 } 5387 ierr = ISRestoreIndices(pcbddc->is_R_local,&idxs);CHKERRQ(ierr); 5388 ierr = PCSetCoordinates(pc_temp,cdim,nl,scoords);CHKERRQ(ierr); 5389 ierr = PetscFree(scoords);CHKERRQ(ierr); 5390 } 5391 } 5392 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5393 if (!n_R) { 5394 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5395 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5396 } 5397 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5398 /* Reuse solver if it is present */ 5399 if (reuse_neumann_solver) { 5400 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5401 5402 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5403 } 5404 } 5405 5406 if (pcbddc->dbg_flag) { 5407 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5408 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5409 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5410 } 5411 5412 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5413 check_corr = PETSC_FALSE; 5414 if (pcbddc->NullSpace_corr[0]) { 5415 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5416 } 5417 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5418 check_corr = PETSC_TRUE; 5419 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5420 } 5421 if (neumann && pcbddc->NullSpace_corr[2]) { 5422 check_corr = PETSC_TRUE; 5423 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5424 } 5425 /* check Dirichlet and Neumann solvers */ 5426 if (pcbddc->dbg_flag) { 5427 if (dirichlet) { /* Dirichlet */ 5428 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5429 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5430 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5431 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5432 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5433 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5434 if (check_corr) { 5435 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 5436 } 5437 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5438 } 5439 if (neumann) { /* Neumann */ 5440 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5441 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5442 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5443 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5444 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5445 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5446 if (check_corr) { 5447 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 5448 } 5449 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5450 } 5451 } 5452 /* free Neumann problem's matrix */ 5453 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5454 PetscFunctionReturn(0); 5455 } 5456 5457 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5458 { 5459 PetscErrorCode ierr; 5460 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5461 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5462 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 5463 5464 PetscFunctionBegin; 5465 if (!reuse_solver) { 5466 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5467 } 5468 if (!pcbddc->switch_static) { 5469 if (applytranspose && pcbddc->local_auxmat1) { 5470 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5471 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5472 } 5473 if (!reuse_solver) { 5474 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5475 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5476 } else { 5477 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5478 5479 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5480 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5481 } 5482 } else { 5483 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5484 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5485 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5486 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5487 if (applytranspose && pcbddc->local_auxmat1) { 5488 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5489 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5490 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5491 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5492 } 5493 } 5494 if (!reuse_solver || pcbddc->switch_static) { 5495 if (applytranspose) { 5496 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5497 } else { 5498 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5499 } 5500 } else { 5501 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5502 5503 if (applytranspose) { 5504 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5505 } else { 5506 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5507 } 5508 } 5509 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5510 if (!pcbddc->switch_static) { 5511 if (!reuse_solver) { 5512 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5513 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5514 } else { 5515 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5516 5517 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5518 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5519 } 5520 if (!applytranspose && pcbddc->local_auxmat1) { 5521 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5522 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5523 } 5524 } else { 5525 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5526 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5527 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5528 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5529 if (!applytranspose && pcbddc->local_auxmat1) { 5530 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5531 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5532 } 5533 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5534 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5535 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5536 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5537 } 5538 PetscFunctionReturn(0); 5539 } 5540 5541 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5542 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5543 { 5544 PetscErrorCode ierr; 5545 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5546 PC_IS* pcis = (PC_IS*) (pc->data); 5547 const PetscScalar zero = 0.0; 5548 5549 PetscFunctionBegin; 5550 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5551 if (!pcbddc->benign_apply_coarse_only) { 5552 if (applytranspose) { 5553 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5554 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5555 } else { 5556 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5557 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5558 } 5559 } else { 5560 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5561 } 5562 5563 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5564 if (pcbddc->benign_n) { 5565 PetscScalar *array; 5566 PetscInt j; 5567 5568 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5569 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5570 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5571 } 5572 5573 /* start communications from local primal nodes to rhs of coarse solver */ 5574 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5575 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5576 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5577 5578 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5579 if (pcbddc->coarse_ksp) { 5580 Mat coarse_mat; 5581 Vec rhs,sol; 5582 MatNullSpace nullsp; 5583 PetscBool isbddc = PETSC_FALSE; 5584 5585 if (pcbddc->benign_have_null) { 5586 PC coarse_pc; 5587 5588 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5589 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5590 /* we need to propagate to coarser levels the need for a possible benign correction */ 5591 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5592 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5593 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5594 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5595 } 5596 } 5597 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5598 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5599 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5600 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5601 if (nullsp) { 5602 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5603 } 5604 if (applytranspose) { 5605 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5606 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5607 } else { 5608 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5609 PC coarse_pc; 5610 5611 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5612 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5613 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5614 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5615 } else { 5616 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5617 } 5618 } 5619 /* we don't need the benign correction at coarser levels anymore */ 5620 if (pcbddc->benign_have_null && isbddc) { 5621 PC coarse_pc; 5622 PC_BDDC* coarsepcbddc; 5623 5624 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5625 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5626 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5627 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5628 } 5629 if (nullsp) { 5630 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5631 } 5632 } 5633 5634 /* Local solution on R nodes */ 5635 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5636 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5637 } 5638 /* communications from coarse sol to local primal nodes */ 5639 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5640 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5641 5642 /* Sum contributions from the two levels */ 5643 if (!pcbddc->benign_apply_coarse_only) { 5644 if (applytranspose) { 5645 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5646 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5647 } else { 5648 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5649 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5650 } 5651 /* store p0 */ 5652 if (pcbddc->benign_n) { 5653 PetscScalar *array; 5654 PetscInt j; 5655 5656 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5657 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5658 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5659 } 5660 } else { /* expand the coarse solution */ 5661 if (applytranspose) { 5662 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5663 } else { 5664 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5665 } 5666 } 5667 PetscFunctionReturn(0); 5668 } 5669 5670 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5671 { 5672 PetscErrorCode ierr; 5673 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5674 PetscScalar *array; 5675 Vec from,to; 5676 5677 PetscFunctionBegin; 5678 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5679 from = pcbddc->coarse_vec; 5680 to = pcbddc->vec1_P; 5681 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5682 Vec tvec; 5683 5684 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5685 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5686 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5687 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5688 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5689 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5690 } 5691 } else { /* from local to global -> put data in coarse right hand side */ 5692 from = pcbddc->vec1_P; 5693 to = pcbddc->coarse_vec; 5694 } 5695 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5696 PetscFunctionReturn(0); 5697 } 5698 5699 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5700 { 5701 PetscErrorCode ierr; 5702 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5703 PetscScalar *array; 5704 Vec from,to; 5705 5706 PetscFunctionBegin; 5707 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5708 from = pcbddc->coarse_vec; 5709 to = pcbddc->vec1_P; 5710 } else { /* from local to global -> put data in coarse right hand side */ 5711 from = pcbddc->vec1_P; 5712 to = pcbddc->coarse_vec; 5713 } 5714 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5715 if (smode == SCATTER_FORWARD) { 5716 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5717 Vec tvec; 5718 5719 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5720 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5721 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5722 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5723 } 5724 } else { 5725 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5726 ierr = VecResetArray(from);CHKERRQ(ierr); 5727 } 5728 } 5729 PetscFunctionReturn(0); 5730 } 5731 5732 /* uncomment for testing purposes */ 5733 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5734 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5735 { 5736 PetscErrorCode ierr; 5737 PC_IS* pcis = (PC_IS*)(pc->data); 5738 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5739 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5740 /* one and zero */ 5741 PetscScalar one=1.0,zero=0.0; 5742 /* space to store constraints and their local indices */ 5743 PetscScalar *constraints_data; 5744 PetscInt *constraints_idxs,*constraints_idxs_B; 5745 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5746 PetscInt *constraints_n; 5747 /* iterators */ 5748 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5749 /* BLAS integers */ 5750 PetscBLASInt lwork,lierr; 5751 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5752 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5753 /* reuse */ 5754 PetscInt olocal_primal_size,olocal_primal_size_cc; 5755 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5756 /* change of basis */ 5757 PetscBool qr_needed; 5758 PetscBT change_basis,qr_needed_idx; 5759 /* auxiliary stuff */ 5760 PetscInt *nnz,*is_indices; 5761 PetscInt ncc; 5762 /* some quantities */ 5763 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5764 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5765 PetscReal tol; /* tolerance for retaining eigenmodes */ 5766 5767 PetscFunctionBegin; 5768 tol = PetscSqrtReal(PETSC_SMALL); 5769 /* Destroy Mat objects computed previously */ 5770 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5771 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5772 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5773 /* save info on constraints from previous setup (if any) */ 5774 olocal_primal_size = pcbddc->local_primal_size; 5775 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5776 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5777 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5778 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5779 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5780 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5781 5782 if (!pcbddc->adaptive_selection) { 5783 IS ISForVertices,*ISForFaces,*ISForEdges; 5784 MatNullSpace nearnullsp; 5785 const Vec *nearnullvecs; 5786 Vec *localnearnullsp; 5787 PetscScalar *array; 5788 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5789 PetscBool nnsp_has_cnst; 5790 /* LAPACK working arrays for SVD or POD */ 5791 PetscBool skip_lapack,boolforchange; 5792 PetscScalar *work; 5793 PetscReal *singular_vals; 5794 #if defined(PETSC_USE_COMPLEX) 5795 PetscReal *rwork; 5796 #endif 5797 #if defined(PETSC_MISSING_LAPACK_GESVD) 5798 PetscScalar *temp_basis,*correlation_mat; 5799 #else 5800 PetscBLASInt dummy_int=1; 5801 PetscScalar dummy_scalar=1.; 5802 #endif 5803 5804 /* Get index sets for faces, edges and vertices from graph */ 5805 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5806 /* print some info */ 5807 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5808 PetscInt nv; 5809 5810 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5811 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5812 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5813 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5814 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5815 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5816 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5817 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5818 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5819 } 5820 5821 /* free unneeded index sets */ 5822 if (!pcbddc->use_vertices) { 5823 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5824 } 5825 if (!pcbddc->use_edges) { 5826 for (i=0;i<n_ISForEdges;i++) { 5827 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5828 } 5829 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5830 n_ISForEdges = 0; 5831 } 5832 if (!pcbddc->use_faces) { 5833 for (i=0;i<n_ISForFaces;i++) { 5834 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5835 } 5836 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5837 n_ISForFaces = 0; 5838 } 5839 5840 /* check if near null space is attached to global mat */ 5841 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5842 if (nearnullsp) { 5843 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5844 /* remove any stored info */ 5845 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5846 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5847 /* store information for BDDC solver reuse */ 5848 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5849 pcbddc->onearnullspace = nearnullsp; 5850 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5851 for (i=0;i<nnsp_size;i++) { 5852 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5853 } 5854 } else { /* if near null space is not provided BDDC uses constants by default */ 5855 nnsp_size = 0; 5856 nnsp_has_cnst = PETSC_TRUE; 5857 } 5858 /* get max number of constraints on a single cc */ 5859 max_constraints = nnsp_size; 5860 if (nnsp_has_cnst) max_constraints++; 5861 5862 /* 5863 Evaluate maximum storage size needed by the procedure 5864 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5865 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5866 There can be multiple constraints per connected component 5867 */ 5868 n_vertices = 0; 5869 if (ISForVertices) { 5870 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5871 } 5872 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5873 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5874 5875 total_counts = n_ISForFaces+n_ISForEdges; 5876 total_counts *= max_constraints; 5877 total_counts += n_vertices; 5878 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5879 5880 total_counts = 0; 5881 max_size_of_constraint = 0; 5882 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5883 IS used_is; 5884 if (i<n_ISForEdges) { 5885 used_is = ISForEdges[i]; 5886 } else { 5887 used_is = ISForFaces[i-n_ISForEdges]; 5888 } 5889 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5890 total_counts += j; 5891 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5892 } 5893 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5894 5895 /* get local part of global near null space vectors */ 5896 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5897 for (k=0;k<nnsp_size;k++) { 5898 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5899 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5900 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5901 } 5902 5903 /* whether or not to skip lapack calls */ 5904 skip_lapack = PETSC_TRUE; 5905 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5906 5907 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5908 if (!skip_lapack) { 5909 PetscScalar temp_work; 5910 5911 #if defined(PETSC_MISSING_LAPACK_GESVD) 5912 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5913 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5914 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5915 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5916 #if defined(PETSC_USE_COMPLEX) 5917 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5918 #endif 5919 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5920 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5921 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5922 lwork = -1; 5923 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5924 #if !defined(PETSC_USE_COMPLEX) 5925 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5926 #else 5927 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5928 #endif 5929 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5930 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5931 #else /* on missing GESVD */ 5932 /* SVD */ 5933 PetscInt max_n,min_n; 5934 max_n = max_size_of_constraint; 5935 min_n = max_constraints; 5936 if (max_size_of_constraint < max_constraints) { 5937 min_n = max_size_of_constraint; 5938 max_n = max_constraints; 5939 } 5940 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5941 #if defined(PETSC_USE_COMPLEX) 5942 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5943 #endif 5944 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5945 lwork = -1; 5946 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5947 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5948 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5949 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5950 #if !defined(PETSC_USE_COMPLEX) 5951 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5952 #else 5953 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5954 #endif 5955 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5956 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5957 #endif /* on missing GESVD */ 5958 /* Allocate optimal workspace */ 5959 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5960 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5961 } 5962 /* Now we can loop on constraining sets */ 5963 total_counts = 0; 5964 constraints_idxs_ptr[0] = 0; 5965 constraints_data_ptr[0] = 0; 5966 /* vertices */ 5967 if (n_vertices) { 5968 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5969 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5970 for (i=0;i<n_vertices;i++) { 5971 constraints_n[total_counts] = 1; 5972 constraints_data[total_counts] = 1.0; 5973 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5974 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5975 total_counts++; 5976 } 5977 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5978 n_vertices = total_counts; 5979 } 5980 5981 /* edges and faces */ 5982 total_counts_cc = total_counts; 5983 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5984 IS used_is; 5985 PetscBool idxs_copied = PETSC_FALSE; 5986 5987 if (ncc<n_ISForEdges) { 5988 used_is = ISForEdges[ncc]; 5989 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5990 } else { 5991 used_is = ISForFaces[ncc-n_ISForEdges]; 5992 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5993 } 5994 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5995 5996 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5997 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5998 /* change of basis should not be performed on local periodic nodes */ 5999 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 6000 if (nnsp_has_cnst) { 6001 PetscScalar quad_value; 6002 6003 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 6004 idxs_copied = PETSC_TRUE; 6005 6006 if (!pcbddc->use_nnsp_true) { 6007 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 6008 } else { 6009 quad_value = 1.0; 6010 } 6011 for (j=0;j<size_of_constraint;j++) { 6012 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 6013 } 6014 temp_constraints++; 6015 total_counts++; 6016 } 6017 for (k=0;k<nnsp_size;k++) { 6018 PetscReal real_value; 6019 PetscScalar *ptr_to_data; 6020 6021 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6022 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 6023 for (j=0;j<size_of_constraint;j++) { 6024 ptr_to_data[j] = array[is_indices[j]]; 6025 } 6026 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 6027 /* check if array is null on the connected component */ 6028 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6029 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 6030 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 6031 temp_constraints++; 6032 total_counts++; 6033 if (!idxs_copied) { 6034 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 6035 idxs_copied = PETSC_TRUE; 6036 } 6037 } 6038 } 6039 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6040 valid_constraints = temp_constraints; 6041 if (!pcbddc->use_nnsp_true && temp_constraints) { 6042 if (temp_constraints == 1) { /* just normalize the constraint */ 6043 PetscScalar norm,*ptr_to_data; 6044 6045 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6046 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6047 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 6048 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 6049 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 6050 } else { /* perform SVD */ 6051 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 6052 6053 #if defined(PETSC_MISSING_LAPACK_GESVD) 6054 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 6055 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 6056 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 6057 the constraints basis will differ (by a complex factor with absolute value equal to 1) 6058 from that computed using LAPACKgesvd 6059 -> This is due to a different computation of eigenvectors in LAPACKheev 6060 -> The quality of the POD-computed basis will be the same */ 6061 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 6062 /* Store upper triangular part of correlation matrix */ 6063 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6064 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6065 for (j=0;j<temp_constraints;j++) { 6066 for (k=0;k<j+1;k++) { 6067 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 6068 } 6069 } 6070 /* compute eigenvalues and eigenvectors of correlation matrix */ 6071 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6072 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 6073 #if !defined(PETSC_USE_COMPLEX) 6074 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 6075 #else 6076 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 6077 #endif 6078 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6079 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 6080 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 6081 j = 0; 6082 while (j < temp_constraints && singular_vals[j] < tol) j++; 6083 total_counts = total_counts-j; 6084 valid_constraints = temp_constraints-j; 6085 /* scale and copy POD basis into used quadrature memory */ 6086 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6087 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6088 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 6089 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6090 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 6091 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6092 if (j<temp_constraints) { 6093 PetscInt ii; 6094 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 6095 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6096 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 6097 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6098 for (k=0;k<temp_constraints-j;k++) { 6099 for (ii=0;ii<size_of_constraint;ii++) { 6100 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 6101 } 6102 } 6103 } 6104 #else /* on missing GESVD */ 6105 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6106 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 6107 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6108 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6109 #if !defined(PETSC_USE_COMPLEX) 6110 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 6111 #else 6112 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 6113 #endif 6114 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 6115 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6116 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 6117 k = temp_constraints; 6118 if (k > size_of_constraint) k = size_of_constraint; 6119 j = 0; 6120 while (j < k && singular_vals[k-j-1] < tol) j++; 6121 valid_constraints = k-j; 6122 total_counts = total_counts-temp_constraints+valid_constraints; 6123 #endif /* on missing GESVD */ 6124 } 6125 } 6126 /* update pointers information */ 6127 if (valid_constraints) { 6128 constraints_n[total_counts_cc] = valid_constraints; 6129 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 6130 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 6131 /* set change_of_basis flag */ 6132 if (boolforchange) { 6133 PetscBTSet(change_basis,total_counts_cc); 6134 } 6135 total_counts_cc++; 6136 } 6137 } 6138 /* free workspace */ 6139 if (!skip_lapack) { 6140 ierr = PetscFree(work);CHKERRQ(ierr); 6141 #if defined(PETSC_USE_COMPLEX) 6142 ierr = PetscFree(rwork);CHKERRQ(ierr); 6143 #endif 6144 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 6145 #if defined(PETSC_MISSING_LAPACK_GESVD) 6146 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 6147 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 6148 #endif 6149 } 6150 for (k=0;k<nnsp_size;k++) { 6151 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 6152 } 6153 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 6154 /* free index sets of faces, edges and vertices */ 6155 for (i=0;i<n_ISForFaces;i++) { 6156 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 6157 } 6158 if (n_ISForFaces) { 6159 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 6160 } 6161 for (i=0;i<n_ISForEdges;i++) { 6162 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 6163 } 6164 if (n_ISForEdges) { 6165 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 6166 } 6167 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 6168 } else { 6169 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 6170 6171 total_counts = 0; 6172 n_vertices = 0; 6173 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 6174 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 6175 } 6176 max_constraints = 0; 6177 total_counts_cc = 0; 6178 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6179 total_counts += pcbddc->adaptive_constraints_n[i]; 6180 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 6181 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 6182 } 6183 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 6184 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 6185 constraints_idxs = pcbddc->adaptive_constraints_idxs; 6186 constraints_data = pcbddc->adaptive_constraints_data; 6187 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 6188 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 6189 total_counts_cc = 0; 6190 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 6191 if (pcbddc->adaptive_constraints_n[i]) { 6192 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 6193 } 6194 } 6195 #if 0 6196 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 6197 for (i=0;i<total_counts_cc;i++) { 6198 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 6199 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 6200 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 6201 printf(" %d",constraints_idxs[j]); 6202 } 6203 printf("\n"); 6204 printf("number of cc: %d\n",constraints_n[i]); 6205 } 6206 for (i=0;i<n_vertices;i++) { 6207 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 6208 } 6209 for (i=0;i<sub_schurs->n_subs;i++) { 6210 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 6211 } 6212 #endif 6213 6214 max_size_of_constraint = 0; 6215 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 6216 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 6217 /* Change of basis */ 6218 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 6219 if (pcbddc->use_change_of_basis) { 6220 for (i=0;i<sub_schurs->n_subs;i++) { 6221 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 6222 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 6223 } 6224 } 6225 } 6226 } 6227 pcbddc->local_primal_size = total_counts; 6228 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6229 6230 /* map constraints_idxs in boundary numbering */ 6231 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 6232 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 6233 6234 /* Create constraint matrix */ 6235 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6236 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 6237 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 6238 6239 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6240 /* determine if a QR strategy is needed for change of basis */ 6241 qr_needed = PETSC_FALSE; 6242 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6243 total_primal_vertices=0; 6244 pcbddc->local_primal_size_cc = 0; 6245 for (i=0;i<total_counts_cc;i++) { 6246 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6247 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6248 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6249 pcbddc->local_primal_size_cc += 1; 6250 } else if (PetscBTLookup(change_basis,i)) { 6251 for (k=0;k<constraints_n[i];k++) { 6252 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6253 } 6254 pcbddc->local_primal_size_cc += constraints_n[i]; 6255 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6256 PetscBTSet(qr_needed_idx,i); 6257 qr_needed = PETSC_TRUE; 6258 } 6259 } else { 6260 pcbddc->local_primal_size_cc += 1; 6261 } 6262 } 6263 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6264 pcbddc->n_vertices = total_primal_vertices; 6265 /* permute indices in order to have a sorted set of vertices */ 6266 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6267 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6268 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 6269 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6270 6271 /* nonzero structure of constraint matrix */ 6272 /* and get reference dof for local constraints */ 6273 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6274 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6275 6276 j = total_primal_vertices; 6277 total_counts = total_primal_vertices; 6278 cum = total_primal_vertices; 6279 for (i=n_vertices;i<total_counts_cc;i++) { 6280 if (!PetscBTLookup(change_basis,i)) { 6281 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6282 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6283 cum++; 6284 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6285 for (k=0;k<constraints_n[i];k++) { 6286 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6287 nnz[j+k] = size_of_constraint; 6288 } 6289 j += constraints_n[i]; 6290 } 6291 } 6292 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6293 ierr = PetscFree(nnz);CHKERRQ(ierr); 6294 6295 /* set values in constraint matrix */ 6296 for (i=0;i<total_primal_vertices;i++) { 6297 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6298 } 6299 total_counts = total_primal_vertices; 6300 for (i=n_vertices;i<total_counts_cc;i++) { 6301 if (!PetscBTLookup(change_basis,i)) { 6302 PetscInt *cols; 6303 6304 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6305 cols = constraints_idxs+constraints_idxs_ptr[i]; 6306 for (k=0;k<constraints_n[i];k++) { 6307 PetscInt row = total_counts+k; 6308 PetscScalar *vals; 6309 6310 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6311 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6312 } 6313 total_counts += constraints_n[i]; 6314 } 6315 } 6316 /* assembling */ 6317 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6318 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6319 ierr = MatChop(pcbddc->ConstraintMatrix,PETSC_SMALL);CHKERRQ(ierr); 6320 ierr = MatSeqAIJCompress(pcbddc->ConstraintMatrix,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6321 ierr = MatViewFromOptions(pcbddc->ConstraintMatrix,NULL,"-pc_bddc_constraint_mat_view");CHKERRQ(ierr); 6322 6323 /* 6324 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6325 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 6326 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 6327 */ 6328 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6329 if (pcbddc->use_change_of_basis) { 6330 /* dual and primal dofs on a single cc */ 6331 PetscInt dual_dofs,primal_dofs; 6332 /* working stuff for GEQRF */ 6333 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 6334 PetscBLASInt lqr_work; 6335 /* working stuff for UNGQR */ 6336 PetscScalar *gqr_work,lgqr_work_t; 6337 PetscBLASInt lgqr_work; 6338 /* working stuff for TRTRS */ 6339 PetscScalar *trs_rhs; 6340 PetscBLASInt Blas_NRHS; 6341 /* pointers for values insertion into change of basis matrix */ 6342 PetscInt *start_rows,*start_cols; 6343 PetscScalar *start_vals; 6344 /* working stuff for values insertion */ 6345 PetscBT is_primal; 6346 PetscInt *aux_primal_numbering_B; 6347 /* matrix sizes */ 6348 PetscInt global_size,local_size; 6349 /* temporary change of basis */ 6350 Mat localChangeOfBasisMatrix; 6351 /* extra space for debugging */ 6352 PetscScalar *dbg_work; 6353 6354 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6355 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6356 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6357 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6358 /* nonzeros for local mat */ 6359 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6360 if (!pcbddc->benign_change || pcbddc->fake_change) { 6361 for (i=0;i<pcis->n;i++) nnz[i]=1; 6362 } else { 6363 const PetscInt *ii; 6364 PetscInt n; 6365 PetscBool flg_row; 6366 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6367 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6368 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6369 } 6370 for (i=n_vertices;i<total_counts_cc;i++) { 6371 if (PetscBTLookup(change_basis,i)) { 6372 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6373 if (PetscBTLookup(qr_needed_idx,i)) { 6374 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6375 } else { 6376 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6377 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6378 } 6379 } 6380 } 6381 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6382 ierr = PetscFree(nnz);CHKERRQ(ierr); 6383 /* Set interior change in the matrix */ 6384 if (!pcbddc->benign_change || pcbddc->fake_change) { 6385 for (i=0;i<pcis->n;i++) { 6386 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6387 } 6388 } else { 6389 const PetscInt *ii,*jj; 6390 PetscScalar *aa; 6391 PetscInt n; 6392 PetscBool flg_row; 6393 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6394 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6395 for (i=0;i<n;i++) { 6396 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6397 } 6398 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6399 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6400 } 6401 6402 if (pcbddc->dbg_flag) { 6403 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6404 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6405 } 6406 6407 6408 /* Now we loop on the constraints which need a change of basis */ 6409 /* 6410 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6411 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6412 6413 Basic blocks of change of basis matrix T computed by 6414 6415 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6416 6417 | 1 0 ... 0 s_1/S | 6418 | 0 1 ... 0 s_2/S | 6419 | ... | 6420 | 0 ... 1 s_{n-1}/S | 6421 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6422 6423 with S = \sum_{i=1}^n s_i^2 6424 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6425 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6426 6427 - QR decomposition of constraints otherwise 6428 */ 6429 if (qr_needed) { 6430 /* space to store Q */ 6431 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6432 /* array to store scaling factors for reflectors */ 6433 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6434 /* first we issue queries for optimal work */ 6435 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6436 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6437 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6438 lqr_work = -1; 6439 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6440 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6441 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6442 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6443 lgqr_work = -1; 6444 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6445 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6446 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6447 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6448 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6449 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6450 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6451 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6452 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6453 /* array to store rhs and solution of triangular solver */ 6454 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6455 /* allocating workspace for check */ 6456 if (pcbddc->dbg_flag) { 6457 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6458 } 6459 } 6460 /* array to store whether a node is primal or not */ 6461 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6462 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6463 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6464 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 6465 for (i=0;i<total_primal_vertices;i++) { 6466 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6467 } 6468 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6469 6470 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6471 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6472 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6473 if (PetscBTLookup(change_basis,total_counts)) { 6474 /* get constraint info */ 6475 primal_dofs = constraints_n[total_counts]; 6476 dual_dofs = size_of_constraint-primal_dofs; 6477 6478 if (pcbddc->dbg_flag) { 6479 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6480 } 6481 6482 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6483 6484 /* copy quadrature constraints for change of basis check */ 6485 if (pcbddc->dbg_flag) { 6486 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6487 } 6488 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6489 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6490 6491 /* compute QR decomposition of constraints */ 6492 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6493 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6494 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6495 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6496 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6497 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6498 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6499 6500 /* explictly compute R^-T */ 6501 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 6502 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6503 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6504 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6505 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6506 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6507 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6508 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6509 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6510 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6511 6512 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6513 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6514 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6515 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6516 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6517 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6518 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6519 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6520 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6521 6522 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6523 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6524 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6525 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6526 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6527 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6528 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6529 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6530 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6531 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6532 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6533 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6534 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6535 6536 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6537 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6538 /* insert cols for primal dofs */ 6539 for (j=0;j<primal_dofs;j++) { 6540 start_vals = &qr_basis[j*size_of_constraint]; 6541 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6542 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6543 } 6544 /* insert cols for dual dofs */ 6545 for (j=0,k=0;j<dual_dofs;k++) { 6546 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6547 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6548 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6549 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6550 j++; 6551 } 6552 } 6553 6554 /* check change of basis */ 6555 if (pcbddc->dbg_flag) { 6556 PetscInt ii,jj; 6557 PetscBool valid_qr=PETSC_TRUE; 6558 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6559 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6560 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6561 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6562 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6563 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6564 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6565 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6566 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6567 for (jj=0;jj<size_of_constraint;jj++) { 6568 for (ii=0;ii<primal_dofs;ii++) { 6569 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6570 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6571 } 6572 } 6573 if (!valid_qr) { 6574 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6575 for (jj=0;jj<size_of_constraint;jj++) { 6576 for (ii=0;ii<primal_dofs;ii++) { 6577 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6578 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6579 } 6580 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6581 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6582 } 6583 } 6584 } 6585 } else { 6586 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6587 } 6588 } 6589 } else { /* simple transformation block */ 6590 PetscInt row,col; 6591 PetscScalar val,norm; 6592 6593 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6594 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6595 for (j=0;j<size_of_constraint;j++) { 6596 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6597 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6598 if (!PetscBTLookup(is_primal,row_B)) { 6599 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6600 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6601 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6602 } else { 6603 for (k=0;k<size_of_constraint;k++) { 6604 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6605 if (row != col) { 6606 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6607 } else { 6608 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6609 } 6610 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6611 } 6612 } 6613 } 6614 if (pcbddc->dbg_flag) { 6615 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6616 } 6617 } 6618 } else { 6619 if (pcbddc->dbg_flag) { 6620 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6621 } 6622 } 6623 } 6624 6625 /* free workspace */ 6626 if (qr_needed) { 6627 if (pcbddc->dbg_flag) { 6628 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6629 } 6630 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6631 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6632 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6633 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6634 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6635 } 6636 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6637 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6638 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6639 6640 /* assembling of global change of variable */ 6641 if (!pcbddc->fake_change) { 6642 Mat tmat; 6643 PetscInt bs; 6644 6645 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6646 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6647 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6648 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6649 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6650 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6651 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6652 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6653 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6654 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6655 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6656 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6657 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6658 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6659 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6660 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6661 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6662 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6663 6664 /* check */ 6665 if (pcbddc->dbg_flag) { 6666 PetscReal error; 6667 Vec x,x_change; 6668 6669 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6670 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6671 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6672 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6673 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6674 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6675 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6676 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6677 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6678 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6679 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6680 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6681 if (error > PETSC_SMALL) { 6682 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6683 } 6684 ierr = VecDestroy(&x);CHKERRQ(ierr); 6685 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6686 } 6687 /* adapt sub_schurs computed (if any) */ 6688 if (pcbddc->use_deluxe_scaling) { 6689 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6690 6691 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6692 if (sub_schurs && sub_schurs->S_Ej_all) { 6693 Mat S_new,tmat; 6694 IS is_all_N,is_V_Sall = NULL; 6695 6696 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6697 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6698 if (pcbddc->deluxe_zerorows) { 6699 ISLocalToGlobalMapping NtoSall; 6700 IS is_V; 6701 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6702 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6703 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6704 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6705 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6706 } 6707 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6708 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6709 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6710 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6711 if (pcbddc->deluxe_zerorows) { 6712 const PetscScalar *array; 6713 const PetscInt *idxs_V,*idxs_all; 6714 PetscInt i,n_V; 6715 6716 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6717 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6718 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6719 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6720 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6721 for (i=0;i<n_V;i++) { 6722 PetscScalar val; 6723 PetscInt idx; 6724 6725 idx = idxs_V[i]; 6726 val = array[idxs_all[idxs_V[i]]]; 6727 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6728 } 6729 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6730 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6731 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6732 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6733 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6734 } 6735 sub_schurs->S_Ej_all = S_new; 6736 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6737 if (sub_schurs->sum_S_Ej_all) { 6738 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6739 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6740 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6741 if (pcbddc->deluxe_zerorows) { 6742 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6743 } 6744 sub_schurs->sum_S_Ej_all = S_new; 6745 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6746 } 6747 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6748 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6749 } 6750 /* destroy any change of basis context in sub_schurs */ 6751 if (sub_schurs && sub_schurs->change) { 6752 PetscInt i; 6753 6754 for (i=0;i<sub_schurs->n_subs;i++) { 6755 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6756 } 6757 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6758 } 6759 } 6760 if (pcbddc->switch_static) { /* need to save the local change */ 6761 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6762 } else { 6763 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6764 } 6765 /* determine if any process has changed the pressures locally */ 6766 pcbddc->change_interior = pcbddc->benign_have_null; 6767 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6768 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6769 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6770 pcbddc->use_qr_single = qr_needed; 6771 } 6772 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6773 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6774 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6775 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6776 } else { 6777 Mat benign_global = NULL; 6778 if (pcbddc->benign_have_null) { 6779 Mat tmat; 6780 6781 pcbddc->change_interior = PETSC_TRUE; 6782 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6783 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6784 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6785 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6786 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6787 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6788 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6789 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6790 if (pcbddc->benign_change) { 6791 Mat M; 6792 6793 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6794 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6795 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6796 ierr = MatDestroy(&M);CHKERRQ(ierr); 6797 } else { 6798 Mat eye; 6799 PetscScalar *array; 6800 6801 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6802 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6803 for (i=0;i<pcis->n;i++) { 6804 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6805 } 6806 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6807 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6808 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6809 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6810 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6811 } 6812 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6813 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6814 } 6815 if (pcbddc->user_ChangeOfBasisMatrix) { 6816 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6817 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6818 } else if (pcbddc->benign_have_null) { 6819 pcbddc->ChangeOfBasisMatrix = benign_global; 6820 } 6821 } 6822 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6823 IS is_global; 6824 const PetscInt *gidxs; 6825 6826 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6827 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6828 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6829 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6830 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6831 } 6832 } 6833 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6834 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6835 } 6836 6837 if (!pcbddc->fake_change) { 6838 /* add pressure dofs to set of primal nodes for numbering purposes */ 6839 for (i=0;i<pcbddc->benign_n;i++) { 6840 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6841 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6842 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6843 pcbddc->local_primal_size_cc++; 6844 pcbddc->local_primal_size++; 6845 } 6846 6847 /* check if a new primal space has been introduced (also take into account benign trick) */ 6848 pcbddc->new_primal_space_local = PETSC_TRUE; 6849 if (olocal_primal_size == pcbddc->local_primal_size) { 6850 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6851 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6852 if (!pcbddc->new_primal_space_local) { 6853 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6854 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6855 } 6856 } 6857 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6858 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6859 } 6860 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6861 6862 /* flush dbg viewer */ 6863 if (pcbddc->dbg_flag) { 6864 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6865 } 6866 6867 /* free workspace */ 6868 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6869 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6870 if (!pcbddc->adaptive_selection) { 6871 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6872 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6873 } else { 6874 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6875 pcbddc->adaptive_constraints_idxs_ptr, 6876 pcbddc->adaptive_constraints_data_ptr, 6877 pcbddc->adaptive_constraints_idxs, 6878 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6879 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6880 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6881 } 6882 PetscFunctionReturn(0); 6883 } 6884 /* #undef PETSC_MISSING_LAPACK_GESVD */ 6885 6886 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6887 { 6888 ISLocalToGlobalMapping map; 6889 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6890 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6891 PetscInt i,N; 6892 PetscBool rcsr = PETSC_FALSE; 6893 PetscErrorCode ierr; 6894 6895 PetscFunctionBegin; 6896 if (pcbddc->recompute_topography) { 6897 pcbddc->graphanalyzed = PETSC_FALSE; 6898 /* Reset previously computed graph */ 6899 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6900 /* Init local Graph struct */ 6901 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6902 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6903 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6904 6905 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6906 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6907 } 6908 /* Check validity of the csr graph passed in by the user */ 6909 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %D, expected %D\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6910 6911 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6912 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6913 PetscInt *xadj,*adjncy; 6914 PetscInt nvtxs; 6915 PetscBool flg_row=PETSC_FALSE; 6916 6917 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6918 if (flg_row) { 6919 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6920 pcbddc->computed_rowadj = PETSC_TRUE; 6921 } 6922 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6923 rcsr = PETSC_TRUE; 6924 } 6925 if (pcbddc->dbg_flag) { 6926 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6927 } 6928 6929 if (pcbddc->mat_graph->cdim && !pcbddc->mat_graph->cloc) { 6930 PetscReal *lcoords; 6931 PetscInt n; 6932 MPI_Datatype dimrealtype; 6933 6934 if (pcbddc->mat_graph->cnloc != pc->pmat->rmap->n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pc->pmat->rmap->n); 6935 ierr = MatGetLocalSize(matis->A,&n,NULL);CHKERRQ(ierr); 6936 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 6937 ierr = PetscMalloc1(pcbddc->mat_graph->cdim*n,&lcoords);CHKERRQ(ierr); 6938 ierr = MPI_Type_contiguous(pcbddc->mat_graph->cdim,MPIU_REAL,&dimrealtype);CHKERRQ(ierr); 6939 ierr = MPI_Type_commit(&dimrealtype);CHKERRQ(ierr); 6940 ierr = PetscSFBcastBegin(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 6941 ierr = PetscSFBcastEnd(matis->sf,dimrealtype,pcbddc->mat_graph->coords,lcoords);CHKERRQ(ierr); 6942 ierr = MPI_Type_free(&dimrealtype);CHKERRQ(ierr); 6943 ierr = PetscFree(pcbddc->mat_graph->coords);CHKERRQ(ierr); 6944 6945 pcbddc->mat_graph->coords = lcoords; 6946 pcbddc->mat_graph->cloc = PETSC_TRUE; 6947 pcbddc->mat_graph->cnloc = n; 6948 } 6949 if (pcbddc->mat_graph->cnloc && pcbddc->mat_graph->cnloc != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid number of local subdomain coordinates! Got %D, expected %D",pcbddc->mat_graph->cnloc,pcbddc->mat_graph->nvtxs); 6950 pcbddc->mat_graph->active_coords = (PetscBool)(pcbddc->corner_selection && !pcbddc->corner_selected); 6951 6952 /* Setup of Graph */ 6953 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6954 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6955 6956 /* attach info on disconnected subdomains if present */ 6957 if (pcbddc->n_local_subs) { 6958 PetscInt *local_subs; 6959 6960 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6961 for (i=0;i<pcbddc->n_local_subs;i++) { 6962 const PetscInt *idxs; 6963 PetscInt nl,j; 6964 6965 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6966 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6967 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6968 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6969 } 6970 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6971 pcbddc->mat_graph->local_subs = local_subs; 6972 } 6973 } 6974 6975 if (!pcbddc->graphanalyzed) { 6976 /* Graph's connected components analysis */ 6977 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6978 pcbddc->graphanalyzed = PETSC_TRUE; 6979 } 6980 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6981 PetscFunctionReturn(0); 6982 } 6983 6984 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6985 { 6986 PetscInt i,j; 6987 PetscScalar *alphas; 6988 PetscErrorCode ierr; 6989 6990 PetscFunctionBegin; 6991 if (!n) PetscFunctionReturn(0); 6992 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6993 ierr = VecNormalize(vecs[0],NULL);CHKERRQ(ierr); 6994 for (i=1;i<n;i++) { 6995 ierr = VecMDot(vecs[i],i,vecs,alphas);CHKERRQ(ierr); 6996 for (j=0;j<i;j++) alphas[j] = PetscConj(-alphas[j]); 6997 ierr = VecMAXPY(vecs[i],i,alphas,vecs);CHKERRQ(ierr); 6998 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6999 } 7000 ierr = PetscFree(alphas);CHKERRQ(ierr); 7001 PetscFunctionReturn(0); 7002 } 7003 7004 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 7005 { 7006 Mat A; 7007 PetscInt n_neighs,*neighs,*n_shared,**shared; 7008 PetscMPIInt size,rank,color; 7009 PetscInt *xadj,*adjncy; 7010 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 7011 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 7012 PetscInt void_procs,*procs_candidates = NULL; 7013 PetscInt xadj_count,*count; 7014 PetscBool ismatis,use_vwgt=PETSC_FALSE; 7015 PetscSubcomm psubcomm; 7016 MPI_Comm subcomm; 7017 PetscErrorCode ierr; 7018 7019 PetscFunctionBegin; 7020 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7021 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7022 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7023 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 7024 PetscValidLogicalCollectiveInt(mat,redprocs,3); 7025 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 7026 7027 if (have_void) *have_void = PETSC_FALSE; 7028 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 7029 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 7030 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 7031 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 7032 im_active = !!n; 7033 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7034 void_procs = size - active_procs; 7035 /* get ranks of of non-active processes in mat communicator */ 7036 if (void_procs) { 7037 PetscInt ncand; 7038 7039 if (have_void) *have_void = PETSC_TRUE; 7040 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 7041 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 7042 for (i=0,ncand=0;i<size;i++) { 7043 if (!procs_candidates[i]) { 7044 procs_candidates[ncand++] = i; 7045 } 7046 } 7047 /* force n_subdomains to be not greater that the number of non-active processes */ 7048 *n_subdomains = PetscMin(void_procs,*n_subdomains); 7049 } 7050 7051 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 7052 number of subdomains requested 1 -> send to master or first candidate in voids */ 7053 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 7054 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 7055 PetscInt issize,isidx,dest; 7056 if (*n_subdomains == 1) dest = 0; 7057 else dest = rank; 7058 if (im_active) { 7059 issize = 1; 7060 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7061 isidx = procs_candidates[dest]; 7062 } else { 7063 isidx = dest; 7064 } 7065 } else { 7066 issize = 0; 7067 isidx = -1; 7068 } 7069 if (*n_subdomains != 1) *n_subdomains = active_procs; 7070 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 7071 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7072 PetscFunctionReturn(0); 7073 } 7074 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 7075 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 7076 threshold = PetscMax(threshold,2); 7077 7078 /* Get info on mapping */ 7079 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7080 7081 /* build local CSR graph of subdomains' connectivity */ 7082 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 7083 xadj[0] = 0; 7084 xadj[1] = PetscMax(n_neighs-1,0); 7085 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 7086 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 7087 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 7088 for (i=1;i<n_neighs;i++) 7089 for (j=0;j<n_shared[i];j++) 7090 count[shared[i][j]] += 1; 7091 7092 xadj_count = 0; 7093 for (i=1;i<n_neighs;i++) { 7094 for (j=0;j<n_shared[i];j++) { 7095 if (count[shared[i][j]] < threshold) { 7096 adjncy[xadj_count] = neighs[i]; 7097 adjncy_wgt[xadj_count] = n_shared[i]; 7098 xadj_count++; 7099 break; 7100 } 7101 } 7102 } 7103 xadj[1] = xadj_count; 7104 ierr = PetscFree(count);CHKERRQ(ierr); 7105 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 7106 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7107 7108 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 7109 7110 /* Restrict work on active processes only */ 7111 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 7112 if (void_procs) { 7113 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 7114 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 7115 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 7116 subcomm = PetscSubcommChild(psubcomm); 7117 } else { 7118 psubcomm = NULL; 7119 subcomm = PetscObjectComm((PetscObject)mat); 7120 } 7121 7122 v_wgt = NULL; 7123 if (!color) { 7124 ierr = PetscFree(xadj);CHKERRQ(ierr); 7125 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7126 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7127 } else { 7128 Mat subdomain_adj; 7129 IS new_ranks,new_ranks_contig; 7130 MatPartitioning partitioner; 7131 PetscInt rstart=0,rend=0; 7132 PetscInt *is_indices,*oldranks; 7133 PetscMPIInt size; 7134 PetscBool aggregate; 7135 7136 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 7137 if (void_procs) { 7138 PetscInt prank = rank; 7139 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 7140 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 7141 for (i=0;i<xadj[1];i++) { 7142 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 7143 } 7144 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 7145 } else { 7146 oldranks = NULL; 7147 } 7148 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 7149 if (aggregate) { /* TODO: all this part could be made more efficient */ 7150 PetscInt lrows,row,ncols,*cols; 7151 PetscMPIInt nrank; 7152 PetscScalar *vals; 7153 7154 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 7155 lrows = 0; 7156 if (nrank<redprocs) { 7157 lrows = size/redprocs; 7158 if (nrank<size%redprocs) lrows++; 7159 } 7160 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 7161 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 7162 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7163 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 7164 row = nrank; 7165 ncols = xadj[1]-xadj[0]; 7166 cols = adjncy; 7167 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 7168 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 7169 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 7170 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7171 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7172 ierr = PetscFree(xadj);CHKERRQ(ierr); 7173 ierr = PetscFree(adjncy);CHKERRQ(ierr); 7174 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 7175 ierr = PetscFree(vals);CHKERRQ(ierr); 7176 if (use_vwgt) { 7177 Vec v; 7178 const PetscScalar *array; 7179 PetscInt nl; 7180 7181 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 7182 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 7183 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 7184 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 7185 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 7186 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 7187 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 7188 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 7189 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 7190 ierr = VecDestroy(&v);CHKERRQ(ierr); 7191 } 7192 } else { 7193 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 7194 if (use_vwgt) { 7195 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 7196 v_wgt[0] = n; 7197 } 7198 } 7199 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 7200 7201 /* Partition */ 7202 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 7203 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 7204 if (v_wgt) { 7205 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 7206 } 7207 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 7208 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 7209 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 7210 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 7211 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 7212 7213 /* renumber new_ranks to avoid "holes" in new set of processors */ 7214 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 7215 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 7216 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7217 if (!aggregate) { 7218 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7219 #if defined(PETSC_USE_DEBUG) 7220 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7221 #endif 7222 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 7223 } else if (oldranks) { 7224 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 7225 } else { 7226 ranks_send_to_idx[0] = is_indices[0]; 7227 } 7228 } else { 7229 PetscInt idx = 0; 7230 PetscMPIInt tag; 7231 MPI_Request *reqs; 7232 7233 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 7234 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 7235 for (i=rstart;i<rend;i++) { 7236 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 7237 } 7238 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 7239 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7240 ierr = PetscFree(reqs);CHKERRQ(ierr); 7241 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 7242 #if defined(PETSC_USE_DEBUG) 7243 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 7244 #endif 7245 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 7246 } else if (oldranks) { 7247 ranks_send_to_idx[0] = oldranks[idx]; 7248 } else { 7249 ranks_send_to_idx[0] = idx; 7250 } 7251 } 7252 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 7253 /* clean up */ 7254 ierr = PetscFree(oldranks);CHKERRQ(ierr); 7255 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 7256 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 7257 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 7258 } 7259 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 7260 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7261 7262 /* assemble parallel IS for sends */ 7263 i = 1; 7264 if (!color) i=0; 7265 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 7266 PetscFunctionReturn(0); 7267 } 7268 7269 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7270 7271 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7272 { 7273 Mat local_mat; 7274 IS is_sends_internal; 7275 PetscInt rows,cols,new_local_rows; 7276 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7277 PetscBool ismatis,isdense,newisdense,destroy_mat; 7278 ISLocalToGlobalMapping l2gmap; 7279 PetscInt* l2gmap_indices; 7280 const PetscInt* is_indices; 7281 MatType new_local_type; 7282 /* buffers */ 7283 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7284 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7285 PetscInt *recv_buffer_idxs_local; 7286 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 7287 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7288 /* MPI */ 7289 MPI_Comm comm,comm_n; 7290 PetscSubcomm subcomm; 7291 PetscMPIInt n_sends,n_recvs,commsize; 7292 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7293 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7294 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7295 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7296 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7297 PetscErrorCode ierr; 7298 7299 PetscFunctionBegin; 7300 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7301 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7302 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7303 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7304 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7305 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7306 PetscValidLogicalCollectiveBool(mat,reuse,6); 7307 PetscValidLogicalCollectiveInt(mat,nis,8); 7308 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7309 if (nvecs) { 7310 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7311 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7312 } 7313 /* further checks */ 7314 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7315 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7316 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7317 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7318 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7319 if (reuse && *mat_n) { 7320 PetscInt mrows,mcols,mnrows,mncols; 7321 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7322 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7323 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7324 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7325 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7326 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7327 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7328 } 7329 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7330 PetscValidLogicalCollectiveInt(mat,bs,0); 7331 7332 /* prepare IS for sending if not provided */ 7333 if (!is_sends) { 7334 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7335 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7336 } else { 7337 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7338 is_sends_internal = is_sends; 7339 } 7340 7341 /* get comm */ 7342 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7343 7344 /* compute number of sends */ 7345 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7346 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7347 7348 /* compute number of receives */ 7349 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 7350 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 7351 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 7352 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7353 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7354 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7355 ierr = PetscFree(iflags);CHKERRQ(ierr); 7356 7357 /* restrict comm if requested */ 7358 subcomm = 0; 7359 destroy_mat = PETSC_FALSE; 7360 if (restrict_comm) { 7361 PetscMPIInt color,subcommsize; 7362 7363 color = 0; 7364 if (restrict_full) { 7365 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7366 } else { 7367 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7368 } 7369 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7370 subcommsize = commsize - subcommsize; 7371 /* check if reuse has been requested */ 7372 if (reuse) { 7373 if (*mat_n) { 7374 PetscMPIInt subcommsize2; 7375 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 7376 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7377 comm_n = PetscObjectComm((PetscObject)*mat_n); 7378 } else { 7379 comm_n = PETSC_COMM_SELF; 7380 } 7381 } else { /* MAT_INITIAL_MATRIX */ 7382 PetscMPIInt rank; 7383 7384 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 7385 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7386 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7387 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7388 comm_n = PetscSubcommChild(subcomm); 7389 } 7390 /* flag to destroy *mat_n if not significative */ 7391 if (color) destroy_mat = PETSC_TRUE; 7392 } else { 7393 comm_n = comm; 7394 } 7395 7396 /* prepare send/receive buffers */ 7397 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 7398 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 7399 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 7400 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 7401 if (nis) { 7402 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 7403 } 7404 7405 /* Get data from local matrices */ 7406 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7407 /* TODO: See below some guidelines on how to prepare the local buffers */ 7408 /* 7409 send_buffer_vals should contain the raw values of the local matrix 7410 send_buffer_idxs should contain: 7411 - MatType_PRIVATE type 7412 - PetscInt size_of_l2gmap 7413 - PetscInt global_row_indices[size_of_l2gmap] 7414 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7415 */ 7416 else { 7417 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7418 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7419 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7420 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7421 send_buffer_idxs[1] = i; 7422 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7423 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 7424 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7425 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7426 for (i=0;i<n_sends;i++) { 7427 ilengths_vals[is_indices[i]] = len*len; 7428 ilengths_idxs[is_indices[i]] = len+2; 7429 } 7430 } 7431 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7432 /* additional is (if any) */ 7433 if (nis) { 7434 PetscMPIInt psum; 7435 PetscInt j; 7436 for (j=0,psum=0;j<nis;j++) { 7437 PetscInt plen; 7438 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7439 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7440 psum += len+1; /* indices + lenght */ 7441 } 7442 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7443 for (j=0,psum=0;j<nis;j++) { 7444 PetscInt plen; 7445 const PetscInt *is_array_idxs; 7446 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7447 send_buffer_idxs_is[psum] = plen; 7448 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7449 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 7450 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7451 psum += plen+1; /* indices + lenght */ 7452 } 7453 for (i=0;i<n_sends;i++) { 7454 ilengths_idxs_is[is_indices[i]] = psum; 7455 } 7456 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7457 } 7458 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7459 7460 buf_size_idxs = 0; 7461 buf_size_vals = 0; 7462 buf_size_idxs_is = 0; 7463 buf_size_vecs = 0; 7464 for (i=0;i<n_recvs;i++) { 7465 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7466 buf_size_vals += (PetscInt)olengths_vals[i]; 7467 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7468 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7469 } 7470 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7471 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7472 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7473 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7474 7475 /* get new tags for clean communications */ 7476 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7477 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7478 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7479 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7480 7481 /* allocate for requests */ 7482 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7483 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7484 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7485 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7486 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7487 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7488 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7489 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7490 7491 /* communications */ 7492 ptr_idxs = recv_buffer_idxs; 7493 ptr_vals = recv_buffer_vals; 7494 ptr_idxs_is = recv_buffer_idxs_is; 7495 ptr_vecs = recv_buffer_vecs; 7496 for (i=0;i<n_recvs;i++) { 7497 source_dest = onodes[i]; 7498 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7499 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7500 ptr_idxs += olengths_idxs[i]; 7501 ptr_vals += olengths_vals[i]; 7502 if (nis) { 7503 source_dest = onodes_is[i]; 7504 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7505 ptr_idxs_is += olengths_idxs_is[i]; 7506 } 7507 if (nvecs) { 7508 source_dest = onodes[i]; 7509 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7510 ptr_vecs += olengths_idxs[i]-2; 7511 } 7512 } 7513 for (i=0;i<n_sends;i++) { 7514 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7515 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7516 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7517 if (nis) { 7518 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7519 } 7520 if (nvecs) { 7521 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7522 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7523 } 7524 } 7525 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7526 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7527 7528 /* assemble new l2g map */ 7529 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7530 ptr_idxs = recv_buffer_idxs; 7531 new_local_rows = 0; 7532 for (i=0;i<n_recvs;i++) { 7533 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7534 ptr_idxs += olengths_idxs[i]; 7535 } 7536 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7537 ptr_idxs = recv_buffer_idxs; 7538 new_local_rows = 0; 7539 for (i=0;i<n_recvs;i++) { 7540 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 7541 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7542 ptr_idxs += olengths_idxs[i]; 7543 } 7544 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7545 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7546 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7547 7548 /* infer new local matrix type from received local matrices type */ 7549 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7550 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7551 if (n_recvs) { 7552 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7553 ptr_idxs = recv_buffer_idxs; 7554 for (i=0;i<n_recvs;i++) { 7555 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7556 new_local_type_private = MATAIJ_PRIVATE; 7557 break; 7558 } 7559 ptr_idxs += olengths_idxs[i]; 7560 } 7561 switch (new_local_type_private) { 7562 case MATDENSE_PRIVATE: 7563 new_local_type = MATSEQAIJ; 7564 bs = 1; 7565 break; 7566 case MATAIJ_PRIVATE: 7567 new_local_type = MATSEQAIJ; 7568 bs = 1; 7569 break; 7570 case MATBAIJ_PRIVATE: 7571 new_local_type = MATSEQBAIJ; 7572 break; 7573 case MATSBAIJ_PRIVATE: 7574 new_local_type = MATSEQSBAIJ; 7575 break; 7576 default: 7577 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7578 break; 7579 } 7580 } else { /* by default, new_local_type is seqaij */ 7581 new_local_type = MATSEQAIJ; 7582 bs = 1; 7583 } 7584 7585 /* create MATIS object if needed */ 7586 if (!reuse) { 7587 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7588 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7589 } else { 7590 /* it also destroys the local matrices */ 7591 if (*mat_n) { 7592 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7593 } else { /* this is a fake object */ 7594 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7595 } 7596 } 7597 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7598 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7599 7600 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7601 7602 /* Global to local map of received indices */ 7603 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7604 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7605 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7606 7607 /* restore attributes -> type of incoming data and its size */ 7608 buf_size_idxs = 0; 7609 for (i=0;i<n_recvs;i++) { 7610 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7611 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7612 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7613 } 7614 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7615 7616 /* set preallocation */ 7617 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7618 if (!newisdense) { 7619 PetscInt *new_local_nnz=0; 7620 7621 ptr_idxs = recv_buffer_idxs_local; 7622 if (n_recvs) { 7623 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7624 } 7625 for (i=0;i<n_recvs;i++) { 7626 PetscInt j; 7627 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7628 for (j=0;j<*(ptr_idxs+1);j++) { 7629 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7630 } 7631 } else { 7632 /* TODO */ 7633 } 7634 ptr_idxs += olengths_idxs[i]; 7635 } 7636 if (new_local_nnz) { 7637 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7638 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7639 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7640 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7641 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7642 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7643 } else { 7644 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7645 } 7646 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7647 } else { 7648 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7649 } 7650 7651 /* set values */ 7652 ptr_vals = recv_buffer_vals; 7653 ptr_idxs = recv_buffer_idxs_local; 7654 for (i=0;i<n_recvs;i++) { 7655 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7656 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7657 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7658 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7659 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7660 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7661 } else { 7662 /* TODO */ 7663 } 7664 ptr_idxs += olengths_idxs[i]; 7665 ptr_vals += olengths_vals[i]; 7666 } 7667 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7668 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7669 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7670 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7671 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7672 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7673 7674 #if 0 7675 if (!restrict_comm) { /* check */ 7676 Vec lvec,rvec; 7677 PetscReal infty_error; 7678 7679 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7680 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7681 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7682 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7683 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7684 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7685 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7686 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7687 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7688 } 7689 #endif 7690 7691 /* assemble new additional is (if any) */ 7692 if (nis) { 7693 PetscInt **temp_idxs,*count_is,j,psum; 7694 7695 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7696 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7697 ptr_idxs = recv_buffer_idxs_is; 7698 psum = 0; 7699 for (i=0;i<n_recvs;i++) { 7700 for (j=0;j<nis;j++) { 7701 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7702 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7703 psum += plen; 7704 ptr_idxs += plen+1; /* shift pointer to received data */ 7705 } 7706 } 7707 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7708 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7709 for (i=1;i<nis;i++) { 7710 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7711 } 7712 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7713 ptr_idxs = recv_buffer_idxs_is; 7714 for (i=0;i<n_recvs;i++) { 7715 for (j=0;j<nis;j++) { 7716 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7717 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7718 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7719 ptr_idxs += plen+1; /* shift pointer to received data */ 7720 } 7721 } 7722 for (i=0;i<nis;i++) { 7723 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7724 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7725 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7726 } 7727 ierr = PetscFree(count_is);CHKERRQ(ierr); 7728 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7729 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7730 } 7731 /* free workspace */ 7732 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7733 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7734 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7735 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7736 if (isdense) { 7737 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7738 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7739 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7740 } else { 7741 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7742 } 7743 if (nis) { 7744 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7745 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7746 } 7747 7748 if (nvecs) { 7749 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7750 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7751 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7752 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7753 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7754 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7755 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7756 /* set values */ 7757 ptr_vals = recv_buffer_vecs; 7758 ptr_idxs = recv_buffer_idxs_local; 7759 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7760 for (i=0;i<n_recvs;i++) { 7761 PetscInt j; 7762 for (j=0;j<*(ptr_idxs+1);j++) { 7763 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7764 } 7765 ptr_idxs += olengths_idxs[i]; 7766 ptr_vals += olengths_idxs[i]-2; 7767 } 7768 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7769 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7770 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7771 } 7772 7773 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7774 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7775 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7776 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7777 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7778 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7779 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7780 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7781 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7782 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7783 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7784 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7785 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7786 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7787 ierr = PetscFree(onodes);CHKERRQ(ierr); 7788 if (nis) { 7789 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7790 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7791 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7792 } 7793 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7794 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7795 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7796 for (i=0;i<nis;i++) { 7797 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7798 } 7799 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7800 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7801 } 7802 *mat_n = NULL; 7803 } 7804 PetscFunctionReturn(0); 7805 } 7806 7807 /* temporary hack into ksp private data structure */ 7808 #include <petsc/private/kspimpl.h> 7809 7810 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7811 { 7812 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7813 PC_IS *pcis = (PC_IS*)pc->data; 7814 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7815 Mat coarsedivudotp = NULL; 7816 Mat coarseG,t_coarse_mat_is; 7817 MatNullSpace CoarseNullSpace = NULL; 7818 ISLocalToGlobalMapping coarse_islg; 7819 IS coarse_is,*isarray; 7820 PetscInt i,im_active=-1,active_procs=-1; 7821 PetscInt nis,nisdofs,nisneu,nisvert; 7822 PC pc_temp; 7823 PCType coarse_pc_type; 7824 KSPType coarse_ksp_type; 7825 PetscBool multilevel_requested,multilevel_allowed; 7826 PetscBool coarse_reuse; 7827 PetscInt ncoarse,nedcfield; 7828 PetscBool compute_vecs = PETSC_FALSE; 7829 PetscScalar *array; 7830 MatReuse coarse_mat_reuse; 7831 PetscBool restr, full_restr, have_void; 7832 PetscMPIInt commsize; 7833 PetscErrorCode ierr; 7834 7835 PetscFunctionBegin; 7836 /* Assign global numbering to coarse dofs */ 7837 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7838 PetscInt ocoarse_size; 7839 compute_vecs = PETSC_TRUE; 7840 7841 pcbddc->new_primal_space = PETSC_TRUE; 7842 ocoarse_size = pcbddc->coarse_size; 7843 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7844 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7845 /* see if we can avoid some work */ 7846 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7847 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7848 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7849 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7850 coarse_reuse = PETSC_FALSE; 7851 } else { /* we can safely reuse already computed coarse matrix */ 7852 coarse_reuse = PETSC_TRUE; 7853 } 7854 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7855 coarse_reuse = PETSC_FALSE; 7856 } 7857 /* reset any subassembling information */ 7858 if (!coarse_reuse || pcbddc->recompute_topography) { 7859 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7860 } 7861 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7862 coarse_reuse = PETSC_TRUE; 7863 } 7864 /* assemble coarse matrix */ 7865 if (coarse_reuse && pcbddc->coarse_ksp) { 7866 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7867 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7868 coarse_mat_reuse = MAT_REUSE_MATRIX; 7869 } else { 7870 coarse_mat = NULL; 7871 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7872 } 7873 7874 /* creates temporary l2gmap and IS for coarse indexes */ 7875 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7876 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7877 7878 /* creates temporary MATIS object for coarse matrix */ 7879 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7880 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7881 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7882 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7883 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7884 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7885 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7886 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7887 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7888 7889 /* count "active" (i.e. with positive local size) and "void" processes */ 7890 im_active = !!(pcis->n); 7891 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7892 7893 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7894 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7895 /* full_restr : just use the receivers from the subassembling pattern */ 7896 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);CHKERRQ(ierr); 7897 coarse_mat_is = NULL; 7898 multilevel_allowed = PETSC_FALSE; 7899 multilevel_requested = PETSC_FALSE; 7900 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7901 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7902 if (multilevel_requested) { 7903 ncoarse = active_procs/pcbddc->coarsening_ratio; 7904 restr = PETSC_FALSE; 7905 full_restr = PETSC_FALSE; 7906 } else { 7907 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7908 restr = PETSC_TRUE; 7909 full_restr = PETSC_TRUE; 7910 } 7911 if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7912 ncoarse = PetscMax(1,ncoarse); 7913 if (!pcbddc->coarse_subassembling) { 7914 if (pcbddc->coarsening_ratio > 1) { 7915 if (multilevel_requested) { 7916 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7917 } else { 7918 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7919 } 7920 } else { 7921 PetscMPIInt rank; 7922 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7923 have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE; 7924 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7925 } 7926 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7927 PetscInt psum; 7928 if (pcbddc->coarse_ksp) psum = 1; 7929 else psum = 0; 7930 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7931 if (ncoarse < commsize) have_void = PETSC_TRUE; 7932 } 7933 /* determine if we can go multilevel */ 7934 if (multilevel_requested) { 7935 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7936 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7937 } 7938 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7939 7940 /* dump subassembling pattern */ 7941 if (pcbddc->dbg_flag && multilevel_allowed) { 7942 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7943 } 7944 7945 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7946 nedcfield = -1; 7947 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7948 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7949 const PetscInt *idxs; 7950 ISLocalToGlobalMapping tmap; 7951 7952 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7953 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7954 /* allocate space for temporary storage */ 7955 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7956 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7957 /* allocate for IS array */ 7958 nisdofs = pcbddc->n_ISForDofsLocal; 7959 if (pcbddc->nedclocal) { 7960 if (pcbddc->nedfield > -1) { 7961 nedcfield = pcbddc->nedfield; 7962 } else { 7963 nedcfield = 0; 7964 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7965 nisdofs = 1; 7966 } 7967 } 7968 nisneu = !!pcbddc->NeumannBoundariesLocal; 7969 nisvert = 0; /* nisvert is not used */ 7970 nis = nisdofs + nisneu + nisvert; 7971 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7972 /* dofs splitting */ 7973 for (i=0;i<nisdofs;i++) { 7974 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7975 if (nedcfield != i) { 7976 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7977 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7978 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7979 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7980 } else { 7981 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7982 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7983 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7984 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7985 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7986 } 7987 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7988 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7989 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7990 } 7991 /* neumann boundaries */ 7992 if (pcbddc->NeumannBoundariesLocal) { 7993 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7994 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7995 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7996 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7997 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7998 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7999 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 8000 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 8001 } 8002 /* free memory */ 8003 ierr = PetscFree(tidxs);CHKERRQ(ierr); 8004 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 8005 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 8006 } else { 8007 nis = 0; 8008 nisdofs = 0; 8009 nisneu = 0; 8010 nisvert = 0; 8011 isarray = NULL; 8012 } 8013 /* destroy no longer needed map */ 8014 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 8015 8016 /* subassemble */ 8017 if (multilevel_allowed) { 8018 Vec vp[1]; 8019 PetscInt nvecs = 0; 8020 PetscBool reuse,reuser; 8021 8022 if (coarse_mat) reuse = PETSC_TRUE; 8023 else reuse = PETSC_FALSE; 8024 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8025 vp[0] = NULL; 8026 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 8027 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 8028 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 8029 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 8030 nvecs = 1; 8031 8032 if (pcbddc->divudotp) { 8033 Mat B,loc_divudotp; 8034 Vec v,p; 8035 IS dummy; 8036 PetscInt np; 8037 8038 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 8039 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 8040 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 8041 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 8042 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 8043 ierr = VecSet(p,1.);CHKERRQ(ierr); 8044 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 8045 ierr = VecDestroy(&p);CHKERRQ(ierr); 8046 ierr = MatDestroy(&B);CHKERRQ(ierr); 8047 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 8048 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 8049 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 8050 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 8051 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 8052 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8053 ierr = VecDestroy(&v);CHKERRQ(ierr); 8054 } 8055 } 8056 if (reuser) { 8057 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8058 } else { 8059 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 8060 } 8061 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 8062 PetscScalar *arraym,*arrayv; 8063 PetscInt nl; 8064 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 8065 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 8066 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8067 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 8068 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 8069 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 8070 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 8071 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 8072 } else { 8073 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 8074 } 8075 } else { 8076 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 8077 } 8078 if (coarse_mat_is || coarse_mat) { 8079 PetscMPIInt size; 8080 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 8081 if (!multilevel_allowed) { 8082 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 8083 } else { 8084 Mat A; 8085 8086 /* if this matrix is present, it means we are not reusing the coarse matrix */ 8087 if (coarse_mat_is) { 8088 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 8089 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 8090 coarse_mat = coarse_mat_is; 8091 } 8092 /* be sure we don't have MatSeqDENSE as local mat */ 8093 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 8094 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 8095 } 8096 } 8097 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 8098 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 8099 8100 /* create local to global scatters for coarse problem */ 8101 if (compute_vecs) { 8102 PetscInt lrows; 8103 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 8104 if (coarse_mat) { 8105 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 8106 } else { 8107 lrows = 0; 8108 } 8109 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 8110 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 8111 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 8112 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8113 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 8114 } 8115 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 8116 8117 /* set defaults for coarse KSP and PC */ 8118 if (multilevel_allowed) { 8119 coarse_ksp_type = KSPRICHARDSON; 8120 coarse_pc_type = PCBDDC; 8121 } else { 8122 coarse_ksp_type = KSPPREONLY; 8123 coarse_pc_type = PCREDUNDANT; 8124 } 8125 8126 /* print some info if requested */ 8127 if (pcbddc->dbg_flag) { 8128 if (!multilevel_allowed) { 8129 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8130 if (multilevel_requested) { 8131 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8132 } else if (pcbddc->max_levels) { 8133 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 8134 } 8135 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8136 } 8137 } 8138 8139 /* communicate coarse discrete gradient */ 8140 coarseG = NULL; 8141 if (pcbddc->nedcG && multilevel_allowed) { 8142 MPI_Comm ccomm; 8143 if (coarse_mat) { 8144 ccomm = PetscObjectComm((PetscObject)coarse_mat); 8145 } else { 8146 ccomm = MPI_COMM_NULL; 8147 } 8148 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 8149 } 8150 8151 /* create the coarse KSP object only once with defaults */ 8152 if (coarse_mat) { 8153 PetscBool isredundant,isnn,isbddc; 8154 PetscViewer dbg_viewer = NULL; 8155 8156 if (pcbddc->dbg_flag) { 8157 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 8158 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8159 } 8160 if (!pcbddc->coarse_ksp) { 8161 char prefix[256],str_level[16]; 8162 size_t len; 8163 8164 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 8165 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8166 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 8167 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 8168 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8169 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 8170 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 8171 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8172 /* TODO is this logic correct? should check for coarse_mat type */ 8173 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8174 /* prefix */ 8175 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 8176 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 8177 if (!pcbddc->current_level) { 8178 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 8179 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 8180 } else { 8181 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 8182 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 8183 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 8184 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 8185 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 8186 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 8187 } 8188 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 8189 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8190 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 8191 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 8192 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 8193 /* allow user customization */ 8194 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 8195 } 8196 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 8197 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 8198 if (nisdofs) { 8199 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 8200 for (i=0;i<nisdofs;i++) { 8201 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 8202 } 8203 } 8204 if (nisneu) { 8205 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 8206 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 8207 } 8208 if (nisvert) { 8209 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 8210 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 8211 } 8212 if (coarseG) { 8213 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 8214 } 8215 8216 /* get some info after set from options */ 8217 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8218 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 8219 if (isbddc && !multilevel_allowed) { 8220 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 8221 isbddc = PETSC_FALSE; 8222 } 8223 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 8224 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 8225 if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) { 8226 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 8227 isbddc = PETSC_TRUE; 8228 } 8229 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 8230 if (isredundant) { 8231 KSP inner_ksp; 8232 PC inner_pc; 8233 8234 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 8235 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 8236 } 8237 8238 /* parameters which miss an API */ 8239 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 8240 if (isbddc) { 8241 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 8242 8243 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 8244 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 8245 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 8246 if (pcbddc_coarse->benign_saddle_point) { 8247 Mat coarsedivudotp_is; 8248 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 8249 IS row,col; 8250 const PetscInt *gidxs; 8251 PetscInt n,st,M,N; 8252 8253 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 8254 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 8255 st = st-n; 8256 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 8257 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 8258 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 8259 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8260 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 8261 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8262 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 8263 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 8264 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 8265 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 8266 ierr = ISDestroy(&row);CHKERRQ(ierr); 8267 ierr = ISDestroy(&col);CHKERRQ(ierr); 8268 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 8269 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8270 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8271 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8272 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8273 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8274 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8275 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8276 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8277 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8278 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8279 if (pcbddc->adaptive_threshold[0] == 0.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8280 } 8281 } 8282 8283 /* propagate symmetry info of coarse matrix */ 8284 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8285 if (pc->pmat->symmetric_set) { 8286 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8287 } 8288 if (pc->pmat->hermitian_set) { 8289 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8290 } 8291 if (pc->pmat->spd_set) { 8292 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8293 } 8294 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8295 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8296 } 8297 /* set operators */ 8298 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8299 if (pcbddc->dbg_flag) { 8300 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8301 } 8302 } 8303 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8304 ierr = PetscFree(isarray);CHKERRQ(ierr); 8305 #if 0 8306 { 8307 PetscViewer viewer; 8308 char filename[256]; 8309 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8310 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8311 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8312 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8313 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8314 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8315 } 8316 #endif 8317 8318 if (pcbddc->coarse_ksp) { 8319 Vec crhs,csol; 8320 8321 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8322 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8323 if (!csol) { 8324 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8325 } 8326 if (!crhs) { 8327 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8328 } 8329 } 8330 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8331 8332 /* compute null space for coarse solver if the benign trick has been requested */ 8333 if (pcbddc->benign_null) { 8334 8335 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8336 for (i=0;i<pcbddc->benign_n;i++) { 8337 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8338 } 8339 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8340 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8341 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8342 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8343 if (coarse_mat) { 8344 Vec nullv; 8345 PetscScalar *array,*array2; 8346 PetscInt nl; 8347 8348 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8349 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8350 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8351 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8352 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 8353 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8354 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8355 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8356 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8357 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8358 } 8359 } 8360 8361 if (pcbddc->coarse_ksp) { 8362 PetscBool ispreonly; 8363 8364 if (CoarseNullSpace) { 8365 PetscBool isnull; 8366 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8367 if (isnull) { 8368 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8369 } 8370 /* TODO: add local nullspaces (if any) */ 8371 } 8372 /* setup coarse ksp */ 8373 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8374 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8375 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8376 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 8377 KSP check_ksp; 8378 KSPType check_ksp_type; 8379 PC check_pc; 8380 Vec check_vec,coarse_vec; 8381 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8382 PetscInt its; 8383 PetscBool compute_eigs; 8384 PetscReal *eigs_r,*eigs_c; 8385 PetscInt neigs; 8386 const char *prefix; 8387 8388 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8389 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8390 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8391 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8392 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8393 /* prevent from setup unneeded object */ 8394 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8395 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8396 if (ispreonly) { 8397 check_ksp_type = KSPPREONLY; 8398 compute_eigs = PETSC_FALSE; 8399 } else { 8400 check_ksp_type = KSPGMRES; 8401 compute_eigs = PETSC_TRUE; 8402 } 8403 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8404 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8405 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8406 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8407 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8408 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8409 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8410 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8411 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8412 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8413 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8414 /* create random vec */ 8415 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8416 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8417 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8418 /* solve coarse problem */ 8419 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8420 /* set eigenvalue estimation if preonly has not been requested */ 8421 if (compute_eigs) { 8422 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8423 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8424 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8425 if (neigs) { 8426 lambda_max = eigs_r[neigs-1]; 8427 lambda_min = eigs_r[0]; 8428 if (pcbddc->use_coarse_estimates) { 8429 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8430 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8431 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8432 } 8433 } 8434 } 8435 } 8436 8437 /* check coarse problem residual error */ 8438 if (pcbddc->dbg_flag) { 8439 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8440 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8441 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8442 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8443 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8444 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8445 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8446 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8447 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8448 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8449 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8450 if (CoarseNullSpace) { 8451 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8452 } 8453 if (compute_eigs) { 8454 PetscReal lambda_max_s,lambda_min_s; 8455 KSPConvergedReason reason; 8456 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8457 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8458 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8459 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8460 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8461 for (i=0;i<neigs;i++) { 8462 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8463 } 8464 } 8465 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8466 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8467 } 8468 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8469 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8470 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8471 if (compute_eigs) { 8472 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8473 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8474 } 8475 } 8476 } 8477 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8478 /* print additional info */ 8479 if (pcbddc->dbg_flag) { 8480 /* waits until all processes reaches this point */ 8481 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8482 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 8483 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8484 } 8485 8486 /* free memory */ 8487 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8488 PetscFunctionReturn(0); 8489 } 8490 8491 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8492 { 8493 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8494 PC_IS* pcis = (PC_IS*)pc->data; 8495 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8496 IS subset,subset_mult,subset_n; 8497 PetscInt local_size,coarse_size=0; 8498 PetscInt *local_primal_indices=NULL; 8499 const PetscInt *t_local_primal_indices; 8500 PetscErrorCode ierr; 8501 8502 PetscFunctionBegin; 8503 /* Compute global number of coarse dofs */ 8504 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8505 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8506 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8507 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8508 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8509 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8510 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8511 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8512 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8513 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8514 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8515 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8516 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 8517 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8518 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8519 8520 /* check numbering */ 8521 if (pcbddc->dbg_flag) { 8522 PetscScalar coarsesum,*array,*array2; 8523 PetscInt i; 8524 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8525 8526 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8527 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8528 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8529 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8530 /* counter */ 8531 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8532 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8533 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8534 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8535 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8536 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8537 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8538 for (i=0;i<pcbddc->local_primal_size;i++) { 8539 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8540 } 8541 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8542 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8543 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8544 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8545 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8546 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8547 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8548 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8549 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8550 for (i=0;i<pcis->n;i++) { 8551 if (array[i] != 0.0 && array[i] != array2[i]) { 8552 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8553 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8554 set_error = PETSC_TRUE; 8555 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8556 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8557 } 8558 } 8559 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8560 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8561 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8562 for (i=0;i<pcis->n;i++) { 8563 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8564 } 8565 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8566 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8567 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8568 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8569 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8570 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8571 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8572 PetscInt *gidxs; 8573 8574 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8575 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8576 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8577 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8578 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8579 for (i=0;i<pcbddc->local_primal_size;i++) { 8580 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8581 } 8582 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8583 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8584 } 8585 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8586 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8587 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8588 } 8589 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 8590 /* get back data */ 8591 *coarse_size_n = coarse_size; 8592 *local_primal_indices_n = local_primal_indices; 8593 PetscFunctionReturn(0); 8594 } 8595 8596 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8597 { 8598 IS localis_t; 8599 PetscInt i,lsize,*idxs,n; 8600 PetscScalar *vals; 8601 PetscErrorCode ierr; 8602 8603 PetscFunctionBegin; 8604 /* get indices in local ordering exploiting local to global map */ 8605 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8606 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8607 for (i=0;i<lsize;i++) vals[i] = 1.0; 8608 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8609 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8610 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8611 if (idxs) { /* multilevel guard */ 8612 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 8613 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8614 } 8615 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8616 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8617 ierr = PetscFree(vals);CHKERRQ(ierr); 8618 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8619 /* now compute set in local ordering */ 8620 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8621 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8622 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8623 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8624 for (i=0,lsize=0;i<n;i++) { 8625 if (PetscRealPart(vals[i]) > 0.5) { 8626 lsize++; 8627 } 8628 } 8629 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8630 for (i=0,lsize=0;i<n;i++) { 8631 if (PetscRealPart(vals[i]) > 0.5) { 8632 idxs[lsize++] = i; 8633 } 8634 } 8635 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8636 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8637 *localis = localis_t; 8638 PetscFunctionReturn(0); 8639 } 8640 8641 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8642 { 8643 PC_IS *pcis=(PC_IS*)pc->data; 8644 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8645 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8646 Mat S_j; 8647 PetscInt *used_xadj,*used_adjncy; 8648 PetscBool free_used_adj; 8649 PetscErrorCode ierr; 8650 8651 PetscFunctionBegin; 8652 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8653 free_used_adj = PETSC_FALSE; 8654 if (pcbddc->sub_schurs_layers == -1) { 8655 used_xadj = NULL; 8656 used_adjncy = NULL; 8657 } else { 8658 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8659 used_xadj = pcbddc->mat_graph->xadj; 8660 used_adjncy = pcbddc->mat_graph->adjncy; 8661 } else if (pcbddc->computed_rowadj) { 8662 used_xadj = pcbddc->mat_graph->xadj; 8663 used_adjncy = pcbddc->mat_graph->adjncy; 8664 } else { 8665 PetscBool flg_row=PETSC_FALSE; 8666 const PetscInt *xadj,*adjncy; 8667 PetscInt nvtxs; 8668 8669 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8670 if (flg_row) { 8671 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8672 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8673 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8674 free_used_adj = PETSC_TRUE; 8675 } else { 8676 pcbddc->sub_schurs_layers = -1; 8677 used_xadj = NULL; 8678 used_adjncy = NULL; 8679 } 8680 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8681 } 8682 } 8683 8684 /* setup sub_schurs data */ 8685 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8686 if (!sub_schurs->schur_explicit) { 8687 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8688 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8689 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8690 } else { 8691 Mat change = NULL; 8692 Vec scaling = NULL; 8693 IS change_primal = NULL, iP; 8694 PetscInt benign_n; 8695 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8696 PetscBool isseqaij,need_change = PETSC_FALSE; 8697 PetscBool discrete_harmonic = PETSC_FALSE; 8698 8699 if (!pcbddc->use_vertices && reuse_solvers) { 8700 PetscInt n_vertices; 8701 8702 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8703 reuse_solvers = (PetscBool)!n_vertices; 8704 } 8705 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8706 if (!isseqaij) { 8707 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8708 if (matis->A == pcbddc->local_mat) { 8709 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8710 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8711 } else { 8712 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8713 } 8714 } 8715 if (!pcbddc->benign_change_explicit) { 8716 benign_n = pcbddc->benign_n; 8717 } else { 8718 benign_n = 0; 8719 } 8720 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8721 We need a global reduction to avoid possible deadlocks. 8722 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8723 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8724 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8725 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8726 need_change = (PetscBool)(!need_change); 8727 } 8728 /* If the user defines additional constraints, we import them here. 8729 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8730 if (need_change) { 8731 PC_IS *pcisf; 8732 PC_BDDC *pcbddcf; 8733 PC pcf; 8734 8735 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8736 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8737 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8738 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8739 8740 /* hacks */ 8741 pcisf = (PC_IS*)pcf->data; 8742 pcisf->is_B_local = pcis->is_B_local; 8743 pcisf->vec1_N = pcis->vec1_N; 8744 pcisf->BtoNmap = pcis->BtoNmap; 8745 pcisf->n = pcis->n; 8746 pcisf->n_B = pcis->n_B; 8747 pcbddcf = (PC_BDDC*)pcf->data; 8748 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8749 pcbddcf->mat_graph = pcbddc->mat_graph; 8750 pcbddcf->use_faces = PETSC_TRUE; 8751 pcbddcf->use_change_of_basis = PETSC_TRUE; 8752 pcbddcf->use_change_on_faces = PETSC_TRUE; 8753 pcbddcf->use_qr_single = PETSC_TRUE; 8754 pcbddcf->fake_change = PETSC_TRUE; 8755 8756 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8757 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8758 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8759 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8760 change = pcbddcf->ConstraintMatrix; 8761 pcbddcf->ConstraintMatrix = NULL; 8762 8763 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8764 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8765 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8766 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8767 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8768 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8769 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8770 pcf->ops->destroy = NULL; 8771 pcf->ops->reset = NULL; 8772 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8773 } 8774 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8775 8776 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 8777 if (iP) { 8778 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 8779 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 8780 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8781 } 8782 if (discrete_harmonic) { 8783 Mat A; 8784 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 8785 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 8786 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 8787 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8788 ierr = MatDestroy(&A);CHKERRQ(ierr); 8789 } else { 8790 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8791 } 8792 ierr = MatDestroy(&change);CHKERRQ(ierr); 8793 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8794 } 8795 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8796 8797 /* free adjacency */ 8798 if (free_used_adj) { 8799 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8800 } 8801 PetscFunctionReturn(0); 8802 } 8803 8804 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8805 { 8806 PC_IS *pcis=(PC_IS*)pc->data; 8807 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8808 PCBDDCGraph graph; 8809 PetscErrorCode ierr; 8810 8811 PetscFunctionBegin; 8812 /* attach interface graph for determining subsets */ 8813 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8814 IS verticesIS,verticescomm; 8815 PetscInt vsize,*idxs; 8816 8817 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8818 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8819 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8820 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8821 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8822 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8823 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8824 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8825 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8826 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8827 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8828 } else { 8829 graph = pcbddc->mat_graph; 8830 } 8831 /* print some info */ 8832 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8833 IS vertices; 8834 PetscInt nv,nedges,nfaces; 8835 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8836 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8837 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8838 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8839 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8840 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8841 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8842 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8843 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8844 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8845 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8846 } 8847 8848 /* sub_schurs init */ 8849 if (!pcbddc->sub_schurs) { 8850 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8851 } 8852 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8853 8854 /* free graph struct */ 8855 if (pcbddc->sub_schurs_rebuild) { 8856 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8857 } 8858 PetscFunctionReturn(0); 8859 } 8860 8861 PetscErrorCode PCBDDCCheckOperator(PC pc) 8862 { 8863 PC_IS *pcis=(PC_IS*)pc->data; 8864 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8865 PetscErrorCode ierr; 8866 8867 PetscFunctionBegin; 8868 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8869 IS zerodiag = NULL; 8870 Mat S_j,B0_B=NULL; 8871 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8872 PetscScalar *p0_check,*array,*array2; 8873 PetscReal norm; 8874 PetscInt i; 8875 8876 /* B0 and B0_B */ 8877 if (zerodiag) { 8878 IS dummy; 8879 8880 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8881 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8882 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8883 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8884 } 8885 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8886 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8887 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8888 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8889 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8890 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8891 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8892 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8893 /* S_j */ 8894 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8895 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8896 8897 /* mimic vector in \widetilde{W}_\Gamma */ 8898 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8899 /* continuous in primal space */ 8900 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8901 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8902 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8903 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8904 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8905 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8906 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8907 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8908 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8909 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8910 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8911 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8912 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8913 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8914 8915 /* assemble rhs for coarse problem */ 8916 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8917 /* local with Schur */ 8918 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8919 if (zerodiag) { 8920 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8921 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8922 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8923 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8924 } 8925 /* sum on primal nodes the local contributions */ 8926 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8927 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8928 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8929 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8930 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8931 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8932 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8933 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8934 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8935 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8936 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8937 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8938 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8939 /* scale primal nodes (BDDC sums contibutions) */ 8940 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8941 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8942 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8943 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8944 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8945 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8946 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8947 /* global: \widetilde{B0}_B w_\Gamma */ 8948 if (zerodiag) { 8949 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8950 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8951 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8952 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8953 } 8954 /* BDDC */ 8955 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8956 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8957 8958 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8959 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8960 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8961 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8962 for (i=0;i<pcbddc->benign_n;i++) { 8963 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8964 } 8965 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8966 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8967 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8968 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8969 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8970 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8971 } 8972 PetscFunctionReturn(0); 8973 } 8974 8975 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8976 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8977 { 8978 Mat At; 8979 IS rows; 8980 PetscInt rst,ren; 8981 PetscErrorCode ierr; 8982 PetscLayout rmap; 8983 8984 PetscFunctionBegin; 8985 rst = ren = 0; 8986 if (ccomm != MPI_COMM_NULL) { 8987 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8988 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8989 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8990 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8991 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8992 } 8993 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8994 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8995 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8996 8997 if (ccomm != MPI_COMM_NULL) { 8998 Mat_MPIAIJ *a,*b; 8999 IS from,to; 9000 Vec gvec; 9001 PetscInt lsize; 9002 9003 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 9004 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 9005 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 9006 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 9007 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 9008 a = (Mat_MPIAIJ*)At->data; 9009 b = (Mat_MPIAIJ*)(*B)->data; 9010 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 9011 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 9012 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 9013 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 9014 b->A = a->A; 9015 b->B = a->B; 9016 9017 b->donotstash = a->donotstash; 9018 b->roworiented = a->roworiented; 9019 b->rowindices = 0; 9020 b->rowvalues = 0; 9021 b->getrowactive = PETSC_FALSE; 9022 9023 (*B)->rmap = rmap; 9024 (*B)->factortype = A->factortype; 9025 (*B)->assembled = PETSC_TRUE; 9026 (*B)->insertmode = NOT_SET_VALUES; 9027 (*B)->preallocated = PETSC_TRUE; 9028 9029 if (a->colmap) { 9030 #if defined(PETSC_USE_CTABLE) 9031 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 9032 #else 9033 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 9034 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9035 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 9036 #endif 9037 } else b->colmap = 0; 9038 if (a->garray) { 9039 PetscInt len; 9040 len = a->B->cmap->n; 9041 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 9042 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 9043 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 9044 } else b->garray = 0; 9045 9046 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 9047 b->lvec = a->lvec; 9048 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 9049 9050 /* cannot use VecScatterCopy */ 9051 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 9052 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 9053 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 9054 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 9055 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 9056 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 9057 ierr = ISDestroy(&from);CHKERRQ(ierr); 9058 ierr = ISDestroy(&to);CHKERRQ(ierr); 9059 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 9060 } 9061 ierr = MatDestroy(&At);CHKERRQ(ierr); 9062 PetscFunctionReturn(0); 9063 } 9064