1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 #if !defined(PETSC_USE_COMPLEX) 18 PetscScalar *uwork,*data,*U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM,bN,lwork,lierr,di = 1; 21 PetscInt ulw,i,nr,nc,n; 22 PetscErrorCode ierr; 23 24 PetscFunctionBegin; 25 #if defined(PETSC_MISSING_LAPACK_GESVD) 26 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 27 #else 28 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 29 if (!nr || !nc) PetscFunctionReturn(0); 30 31 /* workspace */ 32 if (!work) { 33 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 34 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 35 } else { 36 ulw = lw; 37 uwork = work; 38 } 39 n = PetscMin(nr,nc); 40 if (!rwork) { 41 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 42 } else { 43 sing = rwork; 44 } 45 46 /* SVD */ 47 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 49 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 50 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 51 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 52 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 53 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 54 ierr = PetscFPTrapPop();CHKERRQ(ierr); 55 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 56 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 57 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 58 if (!rwork) { 59 ierr = PetscFree(sing);CHKERRQ(ierr); 60 } 61 if (!work) { 62 ierr = PetscFree(uwork);CHKERRQ(ierr); 63 } 64 /* create B */ 65 if (!range) { 66 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 67 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 68 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 69 } else { 70 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 71 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 72 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 73 } 74 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 75 ierr = PetscFree(U);CHKERRQ(ierr); 76 #endif 77 #else /* PETSC_USE_COMPLEX */ 78 PetscFunctionBegin; 79 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 80 #endif 81 PetscFunctionReturn(0); 82 } 83 84 /* TODO REMOVE */ 85 #if defined(PRINT_GDET) 86 static int inc = 0; 87 static int lev = 0; 88 #endif 89 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 PetscScalar *vals,v; 122 123 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 124 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 125 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 126 /* v = PetscAbsScalar(vals[0]) */; 127 v = 1.; 128 cvals[0] = vals[0]/v; 129 cvals[1] = vals[1]/v; 130 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 131 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 132 #if defined(PRINT_GDET) 133 { 134 PetscViewer viewer; 135 char filename[256]; 136 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 137 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 138 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 140 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 141 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 142 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 143 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 144 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 145 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 146 } 147 #endif 148 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 149 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 150 } 151 152 PetscFunctionReturn(0); 153 } 154 155 PetscErrorCode PCBDDCNedelecSupport(PC pc) 156 { 157 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 158 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 159 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 160 Vec tvec; 161 PetscSF sfv; 162 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 163 MPI_Comm comm; 164 IS lned,primals,allprimals,nedfieldlocal; 165 IS *eedges,*extrows,*extcols,*alleedges; 166 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 167 PetscScalar *vals,*work; 168 PetscReal *rwork; 169 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 170 PetscInt ne,nv,Lv,order,n,field; 171 PetscInt n_neigh,*neigh,*n_shared,**shared; 172 PetscInt i,j,extmem,cum,maxsize,nee; 173 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 174 PetscInt *sfvleaves,*sfvroots; 175 PetscInt *corners,*cedges; 176 PetscInt *ecount,**eneighs,*vcount,**vneighs; 177 #if defined(PETSC_USE_DEBUG) 178 PetscInt *emarks; 179 #endif 180 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 181 PetscErrorCode ierr; 182 183 PetscFunctionBegin; 184 /* If the discrete gradient is defined for a subset of dofs and global is true, 185 it assumes G is given in global ordering for all the dofs. 186 Otherwise, the ordering is global for the Nedelec field */ 187 order = pcbddc->nedorder; 188 conforming = pcbddc->conforming; 189 field = pcbddc->nedfield; 190 global = pcbddc->nedglobal; 191 setprimal = PETSC_FALSE; 192 print = PETSC_FALSE; 193 singular = PETSC_FALSE; 194 195 /* Command line customization */ 196 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 199 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 200 /* print debug info TODO: to be removed */ 201 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 202 ierr = PetscOptionsEnd();CHKERRQ(ierr); 203 204 /* Return if there are no edges in the decomposition and the problem is not singular */ 205 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 206 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 207 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 208 if (!singular) { 209 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 210 lrc[0] = PETSC_FALSE; 211 for (i=0;i<n;i++) { 212 if (PetscRealPart(vals[i]) > 2.) { 213 lrc[0] = PETSC_TRUE; 214 break; 215 } 216 } 217 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 218 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 219 if (!lrc[1]) PetscFunctionReturn(0); 220 } 221 222 /* Get Nedelec field */ 223 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 224 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 225 if (pcbddc->n_ISForDofsLocal && field >= 0) { 226 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 227 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 228 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 229 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 230 ne = n; 231 nedfieldlocal = NULL; 232 global = PETSC_TRUE; 233 } else if (field == PETSC_DECIDE) { 234 PetscInt rst,ren,*idx; 235 236 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 237 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 238 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 239 for (i=rst;i<ren;i++) { 240 PetscInt nc; 241 242 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 243 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 244 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 245 } 246 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 247 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 248 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 249 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 250 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 251 } else { 252 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 253 } 254 255 /* Sanity checks */ 256 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 257 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 258 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 259 260 /* Just set primal dofs and return */ 261 if (setprimal) { 262 IS enedfieldlocal; 263 PetscInt *eidxs; 264 265 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 266 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 267 if (nedfieldlocal) { 268 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 269 for (i=0,cum=0;i<ne;i++) { 270 if (PetscRealPart(vals[idxs[i]]) > 2.) { 271 eidxs[cum++] = idxs[i]; 272 } 273 } 274 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 275 } else { 276 for (i=0,cum=0;i<ne;i++) { 277 if (PetscRealPart(vals[i]) > 2.) { 278 eidxs[cum++] = i; 279 } 280 } 281 } 282 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 283 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 284 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 285 ierr = PetscFree(eidxs);CHKERRQ(ierr); 286 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 287 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 288 PetscFunctionReturn(0); 289 } 290 291 /* Compute some l2g maps */ 292 if (nedfieldlocal) { 293 IS is; 294 295 /* need to map from the local Nedelec field to local numbering */ 296 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 297 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 298 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 299 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 300 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 301 if (global) { 302 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 303 el2g = al2g; 304 } else { 305 IS gis; 306 307 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 308 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 309 ierr = ISDestroy(&gis);CHKERRQ(ierr); 310 } 311 ierr = ISDestroy(&is);CHKERRQ(ierr); 312 } else { 313 /* restore default */ 314 pcbddc->nedfield = -1; 315 /* one ref for the destruction of al2g, one for el2g */ 316 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 317 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 318 el2g = al2g; 319 fl2g = NULL; 320 } 321 322 /* Start communication to drop connections for interior edges (for cc analysis only) */ 323 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 324 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 325 if (nedfieldlocal) { 326 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 327 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 328 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 329 } else { 330 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 331 } 332 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 333 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 334 335 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 336 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 337 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 338 if (global) { 339 PetscInt rst; 340 341 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 342 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 343 if (matis->sf_rootdata[i] < 2) { 344 matis->sf_rootdata[cum++] = i + rst; 345 } 346 } 347 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 348 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 349 } else { 350 PetscInt *tbz; 351 352 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 353 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 354 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 355 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 356 for (i=0,cum=0;i<ne;i++) 357 if (matis->sf_leafdata[idxs[i]] == 1) 358 tbz[cum++] = i; 359 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 360 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 361 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 362 ierr = PetscFree(tbz);CHKERRQ(ierr); 363 } 364 } else { /* we need the entire G to infer the nullspace */ 365 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 366 G = pcbddc->discretegradient; 367 } 368 369 /* Extract subdomain relevant rows of G */ 370 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 371 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 372 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 373 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 374 ierr = ISDestroy(&lned);CHKERRQ(ierr); 375 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 376 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 377 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 378 379 /* SF for nodal dofs communications */ 380 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 381 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 382 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 384 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 386 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 387 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 388 i = singular ? 2 : 1; 389 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 390 391 /* Destroy temporary G created in MATIS format and modified G */ 392 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 393 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 394 ierr = MatDestroy(&G);CHKERRQ(ierr); 395 396 if (print) { 397 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 398 ierr = MatView(lG,NULL);CHKERRQ(ierr); 399 } 400 401 /* Save lG for values insertion in change of basis */ 402 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 403 404 /* Analyze the edge-nodes connections (duplicate lG) */ 405 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 406 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 407 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 409 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 410 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 411 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 412 /* need to import the boundary specification to ensure the 413 proper detection of coarse edges' endpoints */ 414 if (pcbddc->DirichletBoundariesLocal) { 415 IS is; 416 417 if (fl2g) { 418 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 419 } else { 420 is = pcbddc->DirichletBoundariesLocal; 421 } 422 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 423 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 424 for (i=0;i<cum;i++) { 425 if (idxs[i] >= 0) { 426 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 427 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 428 } 429 } 430 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 431 if (fl2g) { 432 ierr = ISDestroy(&is);CHKERRQ(ierr); 433 } 434 } 435 if (pcbddc->NeumannBoundariesLocal) { 436 IS is; 437 438 if (fl2g) { 439 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 440 } else { 441 is = pcbddc->NeumannBoundariesLocal; 442 } 443 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 444 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 445 for (i=0;i<cum;i++) { 446 if (idxs[i] >= 0) { 447 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 448 } 449 } 450 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 451 if (fl2g) { 452 ierr = ISDestroy(&is);CHKERRQ(ierr); 453 } 454 } 455 456 /* Count neighs per dof */ 457 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 458 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 459 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 460 for (i=1,cum=0;i<n_neigh;i++) { 461 cum += n_shared[i]; 462 for (j=0;j<n_shared[i];j++) { 463 ecount[shared[i][j]]++; 464 } 465 } 466 if (ne) { 467 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 468 } 469 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 470 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 471 for (i=1;i<n_neigh;i++) { 472 for (j=0;j<n_shared[i];j++) { 473 PetscInt k = shared[i][j]; 474 eneighs[k][ecount[k]] = neigh[i]; 475 ecount[k]++; 476 } 477 } 478 for (i=0;i<ne;i++) { 479 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 480 } 481 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 482 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 483 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 484 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 485 for (i=1,cum=0;i<n_neigh;i++) { 486 cum += n_shared[i]; 487 for (j=0;j<n_shared[i];j++) { 488 vcount[shared[i][j]]++; 489 } 490 } 491 if (nv) { 492 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 493 } 494 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 495 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 496 for (i=1;i<n_neigh;i++) { 497 for (j=0;j<n_shared[i];j++) { 498 PetscInt k = shared[i][j]; 499 vneighs[k][vcount[k]] = neigh[i]; 500 vcount[k]++; 501 } 502 } 503 for (i=0;i<nv;i++) { 504 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 505 } 506 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 507 508 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 509 for proper detection of coarse edges' endpoints */ 510 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 511 for (i=0;i<ne;i++) { 512 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 513 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 514 } 515 } 516 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 517 if (!conforming) { 518 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 519 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 520 } 521 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 522 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 523 cum = 0; 524 for (i=0;i<ne;i++) { 525 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 526 if (!PetscBTLookup(btee,i)) { 527 marks[cum++] = i; 528 continue; 529 } 530 /* set badly connected edge dofs as primal */ 531 if (!conforming) { 532 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 533 marks[cum++] = i; 534 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 535 for (j=ii[i];j<ii[i+1];j++) { 536 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 537 } 538 } else { 539 /* every edge dofs should be connected trough a certain number of nodal dofs 540 to other edge dofs belonging to coarse edges 541 - at most 2 endpoints 542 - order-1 interior nodal dofs 543 - no undefined nodal dofs (nconn < order) 544 */ 545 PetscInt ends = 0,ints = 0, undef = 0; 546 for (j=ii[i];j<ii[i+1];j++) { 547 PetscInt v = jj[j],k; 548 PetscInt nconn = iit[v+1]-iit[v]; 549 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 550 if (nconn > order) ends++; 551 else if (nconn == order) ints++; 552 else undef++; 553 } 554 if (undef || ends > 2 || ints != order -1) { 555 marks[cum++] = i; 556 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 557 for (j=ii[i];j<ii[i+1];j++) { 558 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 559 } 560 } 561 } 562 } 563 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 564 if (!order && ii[i+1] != ii[i]) { 565 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 566 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 567 } 568 } 569 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 570 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 571 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 572 if (!conforming) { 573 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 574 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 575 } 576 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 577 578 /* identify splitpoints and corner candidates */ 579 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 580 if (print) { 581 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 582 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 583 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 584 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 585 } 586 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 587 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 588 for (i=0;i<nv;i++) { 589 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 590 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 591 if (!order) { /* variable order */ 592 PetscReal vorder = 0.; 593 594 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 595 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 596 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 597 ord = 1; 598 } 599 #if defined(PETSC_USE_DEBUG) 600 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 601 #endif 602 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 603 if (PetscBTLookup(btbd,jj[j])) { 604 bdir = PETSC_TRUE; 605 break; 606 } 607 if (vc != ecount[jj[j]]) { 608 sneighs = PETSC_FALSE; 609 } else { 610 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 611 for (k=0;k<vc;k++) { 612 if (vn[k] != en[k]) { 613 sneighs = PETSC_FALSE; 614 break; 615 } 616 } 617 } 618 } 619 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 620 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 621 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 622 } else if (test == ord) { 623 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 624 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 625 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 626 } else { 627 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 628 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 629 } 630 } 631 } 632 ierr = PetscFree(ecount);CHKERRQ(ierr); 633 ierr = PetscFree(vcount);CHKERRQ(ierr); 634 if (ne) { 635 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 636 } 637 if (nv) { 638 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 639 } 640 ierr = PetscFree(eneighs);CHKERRQ(ierr); 641 ierr = PetscFree(vneighs);CHKERRQ(ierr); 642 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 643 644 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 645 if (order != 1) { 646 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 647 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 648 for (i=0;i<nv;i++) { 649 if (PetscBTLookup(btvcand,i)) { 650 PetscBool found = PETSC_FALSE; 651 for (j=ii[i];j<ii[i+1] && !found;j++) { 652 PetscInt k,e = jj[j]; 653 if (PetscBTLookup(bte,e)) continue; 654 for (k=iit[e];k<iit[e+1];k++) { 655 PetscInt v = jjt[k]; 656 if (v != i && PetscBTLookup(btvcand,v)) { 657 found = PETSC_TRUE; 658 break; 659 } 660 } 661 } 662 if (!found) { 663 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 664 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 665 } else { 666 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 667 } 668 } 669 } 670 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 671 } 672 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 673 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 674 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 675 676 /* Get the local G^T explicitly */ 677 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 678 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 679 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 680 681 /* Mark interior nodal dofs */ 682 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 683 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 684 for (i=1;i<n_neigh;i++) { 685 for (j=0;j<n_shared[i];j++) { 686 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 687 } 688 } 689 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 690 691 /* communicate corners and splitpoints */ 692 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 693 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 694 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 695 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 696 697 if (print) { 698 IS tbz; 699 700 cum = 0; 701 for (i=0;i<nv;i++) 702 if (sfvleaves[i]) 703 vmarks[cum++] = i; 704 705 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 706 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 707 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 708 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 709 } 710 711 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 712 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 713 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 714 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 715 716 /* Zero rows of lGt corresponding to identified corners 717 and interior nodal dofs */ 718 cum = 0; 719 for (i=0;i<nv;i++) { 720 if (sfvleaves[i]) { 721 vmarks[cum++] = i; 722 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 723 } 724 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 725 } 726 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 727 if (print) { 728 IS tbz; 729 730 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 731 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 732 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 733 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 734 } 735 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 736 ierr = PetscFree(vmarks);CHKERRQ(ierr); 737 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 738 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 739 740 /* Recompute G */ 741 ierr = MatDestroy(&lG);CHKERRQ(ierr); 742 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 743 if (print) { 744 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 745 ierr = MatView(lG,NULL);CHKERRQ(ierr); 746 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 747 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 748 } 749 750 /* Get primal dofs (if any) */ 751 cum = 0; 752 for (i=0;i<ne;i++) { 753 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 754 } 755 if (fl2g) { 756 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 757 } 758 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 759 if (print) { 760 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 761 ierr = ISView(primals,NULL);CHKERRQ(ierr); 762 } 763 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 764 /* TODO: what if the user passed in some of them ? */ 765 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 766 ierr = ISDestroy(&primals);CHKERRQ(ierr); 767 768 /* Compute edge connectivity */ 769 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 770 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 771 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 772 if (fl2g) { 773 PetscBT btf; 774 PetscInt *iia,*jja,*iiu,*jju; 775 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 776 777 /* create CSR for all local dofs */ 778 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 779 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 780 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 781 iiu = pcbddc->mat_graph->xadj; 782 jju = pcbddc->mat_graph->adjncy; 783 } else if (pcbddc->use_local_adj) { 784 rest = PETSC_TRUE; 785 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 786 } else { 787 free = PETSC_TRUE; 788 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 789 iiu[0] = 0; 790 for (i=0;i<n;i++) { 791 iiu[i+1] = i+1; 792 jju[i] = -1; 793 } 794 } 795 796 /* import sizes of CSR */ 797 iia[0] = 0; 798 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 799 800 /* overwrite entries corresponding to the Nedelec field */ 801 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 802 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 803 for (i=0;i<ne;i++) { 804 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 805 iia[idxs[i]+1] = ii[i+1]-ii[i]; 806 } 807 808 /* iia in CSR */ 809 for (i=0;i<n;i++) iia[i+1] += iia[i]; 810 811 /* jja in CSR */ 812 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 813 for (i=0;i<n;i++) 814 if (!PetscBTLookup(btf,i)) 815 for (j=0;j<iiu[i+1]-iiu[i];j++) 816 jja[iia[i]+j] = jju[iiu[i]+j]; 817 818 /* map edge dofs connectivity */ 819 if (jj) { 820 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 821 for (i=0;i<ne;i++) { 822 PetscInt e = idxs[i]; 823 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 824 } 825 } 826 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 827 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 828 if (rest) { 829 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 830 } 831 if (free) { 832 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 833 } 834 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 835 } else { 836 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 837 } 838 839 /* Analyze interface for edge dofs */ 840 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 841 pcbddc->mat_graph->twodim = PETSC_FALSE; 842 843 /* Get coarse edges in the edge space */ 844 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 845 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 846 847 if (fl2g) { 848 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 849 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 850 for (i=0;i<nee;i++) { 851 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 852 } 853 } else { 854 eedges = alleedges; 855 primals = allprimals; 856 } 857 858 /* Mark fine edge dofs with their coarse edge id */ 859 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 860 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 861 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 862 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 863 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 864 if (print) { 865 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 866 ierr = ISView(primals,NULL);CHKERRQ(ierr); 867 } 868 869 maxsize = 0; 870 for (i=0;i<nee;i++) { 871 PetscInt size,mark = i+1; 872 873 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 874 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 875 for (j=0;j<size;j++) marks[idxs[j]] = mark; 876 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 877 maxsize = PetscMax(maxsize,size); 878 } 879 880 /* Find coarse edge endpoints */ 881 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 882 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 883 for (i=0;i<nee;i++) { 884 PetscInt mark = i+1,size; 885 886 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 887 if (!size && nedfieldlocal) continue; 888 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 889 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 890 if (print) { 891 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 892 ISView(eedges[i],NULL); 893 } 894 for (j=0;j<size;j++) { 895 PetscInt k, ee = idxs[j]; 896 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 897 for (k=ii[ee];k<ii[ee+1];k++) { 898 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 899 if (PetscBTLookup(btv,jj[k])) { 900 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 901 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 902 PetscInt k2; 903 PetscBool corner = PETSC_FALSE; 904 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 905 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 906 /* it's a corner if either is connected with an edge dof belonging to a different cc or 907 if the edge dof lie on the natural part of the boundary */ 908 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 909 corner = PETSC_TRUE; 910 break; 911 } 912 } 913 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 914 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 915 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 916 } else { 917 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 918 } 919 } 920 } 921 } 922 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 923 } 924 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 925 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 926 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 927 928 /* Reset marked primal dofs */ 929 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 930 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 931 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 932 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 933 934 /* Now use the initial lG */ 935 ierr = MatDestroy(&lG);CHKERRQ(ierr); 936 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 937 lG = lGinit; 938 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 939 940 /* Compute extended cols indices */ 941 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 942 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 943 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 944 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 945 i *= maxsize; 946 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 947 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 948 eerr = PETSC_FALSE; 949 for (i=0;i<nee;i++) { 950 PetscInt size,found = 0; 951 952 cum = 0; 953 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 954 if (!size && nedfieldlocal) continue; 955 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 956 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 957 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 958 for (j=0;j<size;j++) { 959 PetscInt k,ee = idxs[j]; 960 for (k=ii[ee];k<ii[ee+1];k++) { 961 PetscInt vv = jj[k]; 962 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 963 else if (!PetscBTLookupSet(btvc,vv)) found++; 964 } 965 } 966 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 967 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 968 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 969 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 970 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 971 /* it may happen that endpoints are not defined at this point 972 if it is the case, mark this edge for a second pass */ 973 if (cum != size -1 || found != 2) { 974 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 975 if (print) { 976 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 977 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 978 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 979 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 980 } 981 eerr = PETSC_TRUE; 982 } 983 } 984 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 985 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 986 if (done) { 987 PetscInt *newprimals; 988 989 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 990 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 991 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 992 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 993 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 994 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 995 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 996 for (i=0;i<nee;i++) { 997 PetscBool has_candidates = PETSC_FALSE; 998 if (PetscBTLookup(bter,i)) { 999 PetscInt size,mark = i+1; 1000 1001 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1002 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1003 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1004 for (j=0;j<size;j++) { 1005 PetscInt k,ee = idxs[j]; 1006 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1007 for (k=ii[ee];k<ii[ee+1];k++) { 1008 /* set all candidates located on the edge as corners */ 1009 if (PetscBTLookup(btvcand,jj[k])) { 1010 PetscInt k2,vv = jj[k]; 1011 has_candidates = PETSC_TRUE; 1012 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1013 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1014 /* set all edge dofs connected to candidate as primals */ 1015 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1016 if (marks[jjt[k2]] == mark) { 1017 PetscInt k3,ee2 = jjt[k2]; 1018 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1019 newprimals[cum++] = ee2; 1020 /* finally set the new corners */ 1021 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1022 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1023 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1024 } 1025 } 1026 } 1027 } else { 1028 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1029 } 1030 } 1031 } 1032 if (!has_candidates) { /* circular edge */ 1033 PetscInt k, ee = idxs[0],*tmarks; 1034 1035 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1036 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1037 for (k=ii[ee];k<ii[ee+1];k++) { 1038 PetscInt k2; 1039 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1040 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1041 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1042 } 1043 for (j=0;j<size;j++) { 1044 if (tmarks[idxs[j]] > 1) { 1045 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1046 newprimals[cum++] = idxs[j]; 1047 } 1048 } 1049 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1050 } 1051 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1052 } 1053 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1054 } 1055 ierr = PetscFree(extcols);CHKERRQ(ierr); 1056 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1057 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1058 if (fl2g) { 1059 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1060 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1061 for (i=0;i<nee;i++) { 1062 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1063 } 1064 ierr = PetscFree(eedges);CHKERRQ(ierr); 1065 } 1066 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1067 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1068 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1069 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1070 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1071 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1072 pcbddc->mat_graph->twodim = PETSC_FALSE; 1073 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1074 if (fl2g) { 1075 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1076 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1077 for (i=0;i<nee;i++) { 1078 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1079 } 1080 } else { 1081 eedges = alleedges; 1082 primals = allprimals; 1083 } 1084 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1085 1086 /* Mark again */ 1087 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1088 for (i=0;i<nee;i++) { 1089 PetscInt size,mark = i+1; 1090 1091 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1092 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1093 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1094 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1095 } 1096 if (print) { 1097 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1098 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1099 } 1100 1101 /* Recompute extended cols */ 1102 eerr = PETSC_FALSE; 1103 for (i=0;i<nee;i++) { 1104 PetscInt size; 1105 1106 cum = 0; 1107 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1108 if (!size && nedfieldlocal) continue; 1109 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1110 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1111 for (j=0;j<size;j++) { 1112 PetscInt k,ee = idxs[j]; 1113 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1114 } 1115 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1116 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1117 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1118 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1119 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1120 if (cum != size -1) { 1121 if (print) { 1122 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1123 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1125 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1126 } 1127 eerr = PETSC_TRUE; 1128 } 1129 } 1130 } 1131 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1132 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1133 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1134 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1135 /* an error should not occur at this point */ 1136 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1137 1138 /* Check the number of endpoints */ 1139 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1140 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1141 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1142 for (i=0;i<nee;i++) { 1143 PetscInt size, found = 0, gc[2]; 1144 1145 /* init with defaults */ 1146 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1147 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1148 if (!size && nedfieldlocal) continue; 1149 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1150 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1151 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1152 for (j=0;j<size;j++) { 1153 PetscInt k,ee = idxs[j]; 1154 for (k=ii[ee];k<ii[ee+1];k++) { 1155 PetscInt vv = jj[k]; 1156 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1157 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1158 corners[i*2+found++] = vv; 1159 } 1160 } 1161 } 1162 if (found != 2) { 1163 PetscInt e; 1164 if (fl2g) { 1165 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1166 } else { 1167 e = idxs[0]; 1168 } 1169 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1170 } 1171 1172 /* get primal dof index on this coarse edge */ 1173 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1174 if (gc[0] > gc[1]) { 1175 PetscInt swap = corners[2*i]; 1176 corners[2*i] = corners[2*i+1]; 1177 corners[2*i+1] = swap; 1178 } 1179 cedges[i] = idxs[size-1]; 1180 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1181 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1182 } 1183 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1184 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1185 1186 #if defined(PETSC_USE_DEBUG) 1187 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1188 not interfere with neighbouring coarse edges */ 1189 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1190 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1191 for (i=0;i<nv;i++) { 1192 PetscInt emax = 0,eemax = 0; 1193 1194 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1195 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1196 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1197 for (j=1;j<nee+1;j++) { 1198 if (emax < emarks[j]) { 1199 emax = emarks[j]; 1200 eemax = j; 1201 } 1202 } 1203 /* not relevant for edges */ 1204 if (!eemax) continue; 1205 1206 for (j=ii[i];j<ii[i+1];j++) { 1207 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1208 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1209 } 1210 } 1211 } 1212 ierr = PetscFree(emarks);CHKERRQ(ierr); 1213 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1214 #endif 1215 1216 /* Compute extended rows indices for edge blocks of the change of basis */ 1217 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1218 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1219 extmem *= maxsize; 1220 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1221 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1222 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1223 for (i=0;i<nv;i++) { 1224 PetscInt mark = 0,size,start; 1225 1226 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1227 for (j=ii[i];j<ii[i+1];j++) 1228 if (marks[jj[j]] && !mark) 1229 mark = marks[jj[j]]; 1230 1231 /* not relevant */ 1232 if (!mark) continue; 1233 1234 /* import extended row */ 1235 mark--; 1236 start = mark*extmem+extrowcum[mark]; 1237 size = ii[i+1]-ii[i]; 1238 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1239 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1240 extrowcum[mark] += size; 1241 } 1242 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1243 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1244 ierr = PetscFree(marks);CHKERRQ(ierr); 1245 1246 /* Compress extrows */ 1247 cum = 0; 1248 for (i=0;i<nee;i++) { 1249 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1250 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1251 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1252 cum = PetscMax(cum,size); 1253 } 1254 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1255 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1256 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1257 1258 /* Workspace for lapack inner calls and VecSetValues */ 1259 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1260 1261 /* Create change of basis matrix (preallocation can be improved) */ 1262 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1263 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1264 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1265 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1266 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1267 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1268 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1269 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1270 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1271 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1272 1273 /* Defaults to identity */ 1274 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1275 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1276 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1277 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1278 1279 /* Create discrete gradient for the coarser level if needed */ 1280 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1281 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1282 if (pcbddc->current_level < pcbddc->max_levels) { 1283 ISLocalToGlobalMapping cel2g,cvl2g; 1284 IS wis,gwis; 1285 PetscInt cnv,cne; 1286 1287 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1288 if (fl2g) { 1289 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1290 } else { 1291 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1292 pcbddc->nedclocal = wis; 1293 } 1294 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1295 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1296 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1297 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1298 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1299 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1300 1301 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1302 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1303 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1304 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1305 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1306 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1307 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1308 1309 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1310 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1311 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1312 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1313 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1314 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1315 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1316 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1317 } 1318 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1319 1320 #if defined(PRINT_GDET) 1321 inc = 0; 1322 lev = pcbddc->current_level; 1323 #endif 1324 1325 /* Insert values in the change of basis matrix */ 1326 for (i=0;i<nee;i++) { 1327 Mat Gins = NULL, GKins = NULL; 1328 IS cornersis = NULL; 1329 PetscScalar cvals[2]; 1330 1331 if (pcbddc->nedcG) { 1332 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1333 } 1334 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1335 if (Gins && GKins) { 1336 PetscScalar *data; 1337 const PetscInt *rows,*cols; 1338 PetscInt nrh,nch,nrc,ncc; 1339 1340 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1341 /* H1 */ 1342 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1343 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1344 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1345 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1346 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1347 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1348 /* complement */ 1349 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1350 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1351 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1352 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1353 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1354 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1355 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1356 1357 /* coarse discrete gradient */ 1358 if (pcbddc->nedcG) { 1359 PetscInt cols[2]; 1360 1361 cols[0] = 2*i; 1362 cols[1] = 2*i+1; 1363 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1364 } 1365 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1366 } 1367 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1368 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1369 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1370 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1371 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1372 } 1373 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1374 1375 /* Start assembling */ 1376 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1377 if (pcbddc->nedcG) { 1378 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1379 } 1380 1381 /* Free */ 1382 if (fl2g) { 1383 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1384 for (i=0;i<nee;i++) { 1385 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(eedges);CHKERRQ(ierr); 1388 } 1389 1390 /* hack mat_graph with primal dofs on the coarse edges */ 1391 { 1392 PCBDDCGraph graph = pcbddc->mat_graph; 1393 PetscInt *oqueue = graph->queue; 1394 PetscInt *ocptr = graph->cptr; 1395 PetscInt ncc,*idxs; 1396 1397 /* find first primal edge */ 1398 if (pcbddc->nedclocal) { 1399 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1400 } else { 1401 if (fl2g) { 1402 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1403 } 1404 idxs = cedges; 1405 } 1406 cum = 0; 1407 while (cum < nee && cedges[cum] < 0) cum++; 1408 1409 /* adapt connected components */ 1410 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1411 graph->cptr[0] = 0; 1412 for (i=0,ncc=0;i<graph->ncc;i++) { 1413 PetscInt lc = ocptr[i+1]-ocptr[i]; 1414 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1415 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1416 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1417 ncc++; 1418 lc--; 1419 cum++; 1420 while (cum < nee && cedges[cum] < 0) cum++; 1421 } 1422 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1423 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1424 ncc++; 1425 } 1426 graph->ncc = ncc; 1427 if (pcbddc->nedclocal) { 1428 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1429 } 1430 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1431 } 1432 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1433 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1434 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1435 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1436 1437 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1438 ierr = PetscFree(extrow);CHKERRQ(ierr); 1439 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1440 ierr = PetscFree(corners);CHKERRQ(ierr); 1441 ierr = PetscFree(cedges);CHKERRQ(ierr); 1442 ierr = PetscFree(extrows);CHKERRQ(ierr); 1443 ierr = PetscFree(extcols);CHKERRQ(ierr); 1444 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1445 1446 /* Complete assembling */ 1447 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1448 if (pcbddc->nedcG) { 1449 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1450 #if 0 1451 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1452 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1453 #endif 1454 } 1455 1456 /* set change of basis */ 1457 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1458 ierr = MatDestroy(&T);CHKERRQ(ierr); 1459 1460 PetscFunctionReturn(0); 1461 } 1462 1463 /* the near-null space of BDDC carries information on quadrature weights, 1464 and these can be collinear -> so cheat with MatNullSpaceCreate 1465 and create a suitable set of basis vectors first */ 1466 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1467 { 1468 PetscErrorCode ierr; 1469 PetscInt i; 1470 1471 PetscFunctionBegin; 1472 for (i=0;i<nvecs;i++) { 1473 PetscInt first,last; 1474 1475 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1476 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1477 if (i>=first && i < last) { 1478 PetscScalar *data; 1479 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1480 if (!has_const) { 1481 data[i-first] = 1.; 1482 } else { 1483 data[2*i-first] = 1./PetscSqrtReal(2.); 1484 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1485 } 1486 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1487 } 1488 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1489 } 1490 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1491 for (i=0;i<nvecs;i++) { /* reset vectors */ 1492 PetscInt first,last; 1493 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1494 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1495 if (i>=first && i < last) { 1496 PetscScalar *data; 1497 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1498 if (!has_const) { 1499 data[i-first] = 0.; 1500 } else { 1501 data[2*i-first] = 0.; 1502 data[2*i-first+1] = 0.; 1503 } 1504 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1505 } 1506 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1507 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1508 } 1509 PetscFunctionReturn(0); 1510 } 1511 1512 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1513 { 1514 Mat loc_divudotp; 1515 Vec p,v,vins,quad_vec,*quad_vecs; 1516 ISLocalToGlobalMapping map; 1517 PetscScalar *vals; 1518 const PetscScalar *array; 1519 PetscInt i,maxneighs,maxsize; 1520 PetscInt n_neigh,*neigh,*n_shared,**shared; 1521 PetscMPIInt rank; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = ISLocalToGlobalMappingGetInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1526 ierr = MPIU_Allreduce(&n_neigh,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1527 maxsize = 0; 1528 for (i=0;i<n_neigh;i++) maxsize = PetscMax(n_shared[i],maxsize); 1529 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1530 /* create vectors to hold quadrature weights */ 1531 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1532 if (!transpose) { 1533 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1534 } else { 1535 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1536 } 1537 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1538 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1539 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1540 for (i=0;i<maxneighs;i++) { 1541 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1542 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1543 } 1544 1545 /* compute local quad vec */ 1546 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1547 if (!transpose) { 1548 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1549 } else { 1550 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1551 } 1552 ierr = VecSet(p,1.);CHKERRQ(ierr); 1553 if (!transpose) { 1554 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1555 } else { 1556 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1557 } 1558 if (vl2l) { 1559 Mat lA; 1560 VecScatter sc; 1561 1562 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1563 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1564 ierr = VecScatterCreate(v,vl2l,vins,NULL,&sc);CHKERRQ(ierr); 1565 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1566 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1567 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1568 } else { 1569 vins = v; 1570 } 1571 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1572 ierr = VecDestroy(&p);CHKERRQ(ierr); 1573 1574 /* insert in global quadrature vecs */ 1575 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1576 for (i=0;i<n_neigh;i++) { 1577 const PetscInt *idxs; 1578 PetscInt idx,nn,j; 1579 1580 idxs = shared[i]; 1581 nn = n_shared[i]; 1582 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1583 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1584 idx = -(idx+1); 1585 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1586 } 1587 ierr = ISLocalToGlobalMappingRestoreInfo(graph->l2gmap,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 1588 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1589 if (vl2l) { 1590 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1591 } 1592 ierr = VecDestroy(&v);CHKERRQ(ierr); 1593 ierr = PetscFree(vals);CHKERRQ(ierr); 1594 1595 /* assemble near null space */ 1596 for (i=0;i<maxneighs;i++) { 1597 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1598 } 1599 for (i=0;i<maxneighs;i++) { 1600 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1601 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1602 } 1603 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1604 PetscFunctionReturn(0); 1605 } 1606 1607 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1608 { 1609 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1610 PetscErrorCode ierr; 1611 1612 PetscFunctionBegin; 1613 if (primalv) { 1614 if (pcbddc->user_primal_vertices_local) { 1615 IS list[2], newp; 1616 1617 list[0] = primalv; 1618 list[1] = pcbddc->user_primal_vertices_local; 1619 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1620 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1621 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1622 pcbddc->user_primal_vertices_local = newp; 1623 } else { 1624 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1625 } 1626 } 1627 PetscFunctionReturn(0); 1628 } 1629 1630 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1631 { 1632 PetscErrorCode ierr; 1633 Vec local,global; 1634 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1635 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1636 PetscBool monolithic = PETSC_FALSE; 1637 1638 PetscFunctionBegin; 1639 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1640 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1641 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1642 /* need to convert from global to local topology information and remove references to information in global ordering */ 1643 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1644 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1645 if (monolithic) { /* just get block size to properly compute vertices */ 1646 if (pcbddc->vertex_size == 1) { 1647 ierr = MatGetBlockSize(pc->pmat,&pcbddc->vertex_size);CHKERRQ(ierr); 1648 } 1649 goto boundary; 1650 } 1651 1652 if (pcbddc->user_provided_isfordofs) { 1653 if (pcbddc->n_ISForDofs) { 1654 PetscInt i; 1655 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1656 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1657 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1658 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1659 } 1660 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1661 pcbddc->n_ISForDofs = 0; 1662 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1663 } 1664 } else { 1665 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1666 DM dm; 1667 1668 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1669 if (!dm) { 1670 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1671 } 1672 if (dm) { 1673 IS *fields; 1674 PetscInt nf,i; 1675 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1676 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1677 for (i=0;i<nf;i++) { 1678 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1679 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1680 } 1681 ierr = PetscFree(fields);CHKERRQ(ierr); 1682 pcbddc->n_ISForDofsLocal = nf; 1683 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1684 PetscContainer c; 1685 1686 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1687 if (c) { 1688 MatISLocalFields lf; 1689 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1690 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1691 } else { /* fallback, create the default fields if bs > 1 */ 1692 PetscInt i, n = matis->A->rmap->n; 1693 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1694 if (i > 1) { 1695 pcbddc->n_ISForDofsLocal = i; 1696 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1697 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1698 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1699 } 1700 } 1701 } 1702 } 1703 } else { 1704 PetscInt i; 1705 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1706 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1707 } 1708 } 1709 } 1710 1711 boundary: 1712 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1713 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1714 } else if (pcbddc->DirichletBoundariesLocal) { 1715 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1716 } 1717 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1718 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1719 } else if (pcbddc->NeumannBoundariesLocal) { 1720 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1721 } 1722 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1723 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1724 } 1725 ierr = VecDestroy(&global);CHKERRQ(ierr); 1726 ierr = VecDestroy(&local);CHKERRQ(ierr); 1727 /* detect local disconnected subdomains if requested (use matis->A) */ 1728 if (pcbddc->detect_disconnected) { 1729 IS primalv = NULL; 1730 PetscInt i; 1731 1732 for (i=0;i<pcbddc->n_local_subs;i++) { 1733 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1734 } 1735 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1736 ierr = PCBDDCDetectDisconnectedComponents(pc,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1737 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1738 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1739 } 1740 /* early stage corner detection */ 1741 { 1742 DM dm; 1743 1744 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1745 if (dm) { 1746 PetscBool isda; 1747 1748 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1749 if (isda) { 1750 ISLocalToGlobalMapping l2l; 1751 IS corners; 1752 Mat lA; 1753 1754 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1755 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1756 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1757 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1758 if (l2l) { 1759 const PetscInt *idx; 1760 PetscInt bs,*idxout,n; 1761 1762 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1763 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1764 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1765 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1766 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1767 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1768 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1769 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1770 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1771 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1772 } else { /* not from DMDA */ 1773 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1774 } 1775 } 1776 } 1777 } 1778 PetscFunctionReturn(0); 1779 } 1780 1781 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1782 { 1783 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1784 PetscErrorCode ierr; 1785 IS nis; 1786 const PetscInt *idxs; 1787 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1788 PetscBool *ld; 1789 1790 PetscFunctionBegin; 1791 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1792 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1793 if (mop == MPI_LAND) { 1794 /* init rootdata with true */ 1795 ld = (PetscBool*) matis->sf_rootdata; 1796 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1797 } else { 1798 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1799 } 1800 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1801 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1802 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1803 ld = (PetscBool*) matis->sf_leafdata; 1804 for (i=0;i<nd;i++) 1805 if (-1 < idxs[i] && idxs[i] < n) 1806 ld[idxs[i]] = PETSC_TRUE; 1807 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1808 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1809 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1810 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1811 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1812 if (mop == MPI_LAND) { 1813 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1814 } else { 1815 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1816 } 1817 for (i=0,nnd=0;i<n;i++) 1818 if (ld[i]) 1819 nidxs[nnd++] = i; 1820 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1821 ierr = ISDestroy(is);CHKERRQ(ierr); 1822 *is = nis; 1823 PetscFunctionReturn(0); 1824 } 1825 1826 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1827 { 1828 PC_IS *pcis = (PC_IS*)(pc->data); 1829 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1830 PetscErrorCode ierr; 1831 1832 PetscFunctionBegin; 1833 if (!pcbddc->benign_have_null) { 1834 PetscFunctionReturn(0); 1835 } 1836 if (pcbddc->ChangeOfBasisMatrix) { 1837 Vec swap; 1838 1839 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1840 swap = pcbddc->work_change; 1841 pcbddc->work_change = r; 1842 r = swap; 1843 } 1844 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1845 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1846 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1847 ierr = VecSet(z,0.);CHKERRQ(ierr); 1848 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1849 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1850 if (pcbddc->ChangeOfBasisMatrix) { 1851 pcbddc->work_change = r; 1852 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1853 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1854 } 1855 PetscFunctionReturn(0); 1856 } 1857 1858 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1859 { 1860 PCBDDCBenignMatMult_ctx ctx; 1861 PetscErrorCode ierr; 1862 PetscBool apply_right,apply_left,reset_x; 1863 1864 PetscFunctionBegin; 1865 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1866 if (transpose) { 1867 apply_right = ctx->apply_left; 1868 apply_left = ctx->apply_right; 1869 } else { 1870 apply_right = ctx->apply_right; 1871 apply_left = ctx->apply_left; 1872 } 1873 reset_x = PETSC_FALSE; 1874 if (apply_right) { 1875 const PetscScalar *ax; 1876 PetscInt nl,i; 1877 1878 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1879 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1880 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1881 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1882 for (i=0;i<ctx->benign_n;i++) { 1883 PetscScalar sum,val; 1884 const PetscInt *idxs; 1885 PetscInt nz,j; 1886 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1887 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1888 sum = 0.; 1889 if (ctx->apply_p0) { 1890 val = ctx->work[idxs[nz-1]]; 1891 for (j=0;j<nz-1;j++) { 1892 sum += ctx->work[idxs[j]]; 1893 ctx->work[idxs[j]] += val; 1894 } 1895 } else { 1896 for (j=0;j<nz-1;j++) { 1897 sum += ctx->work[idxs[j]]; 1898 } 1899 } 1900 ctx->work[idxs[nz-1]] -= sum; 1901 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1902 } 1903 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1904 reset_x = PETSC_TRUE; 1905 } 1906 if (transpose) { 1907 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1908 } else { 1909 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1910 } 1911 if (reset_x) { 1912 ierr = VecResetArray(x);CHKERRQ(ierr); 1913 } 1914 if (apply_left) { 1915 PetscScalar *ay; 1916 PetscInt i; 1917 1918 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1919 for (i=0;i<ctx->benign_n;i++) { 1920 PetscScalar sum,val; 1921 const PetscInt *idxs; 1922 PetscInt nz,j; 1923 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1924 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1925 val = -ay[idxs[nz-1]]; 1926 if (ctx->apply_p0) { 1927 sum = 0.; 1928 for (j=0;j<nz-1;j++) { 1929 sum += ay[idxs[j]]; 1930 ay[idxs[j]] += val; 1931 } 1932 ay[idxs[nz-1]] += sum; 1933 } else { 1934 for (j=0;j<nz-1;j++) { 1935 ay[idxs[j]] += val; 1936 } 1937 ay[idxs[nz-1]] = 0.; 1938 } 1939 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1940 } 1941 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1942 } 1943 PetscFunctionReturn(0); 1944 } 1945 1946 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1947 { 1948 PetscErrorCode ierr; 1949 1950 PetscFunctionBegin; 1951 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1952 PetscFunctionReturn(0); 1953 } 1954 1955 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1956 { 1957 PetscErrorCode ierr; 1958 1959 PetscFunctionBegin; 1960 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1961 PetscFunctionReturn(0); 1962 } 1963 1964 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1965 { 1966 PC_IS *pcis = (PC_IS*)pc->data; 1967 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1968 PCBDDCBenignMatMult_ctx ctx; 1969 PetscErrorCode ierr; 1970 1971 PetscFunctionBegin; 1972 if (!restore) { 1973 Mat A_IB,A_BI; 1974 PetscScalar *work; 1975 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 1976 1977 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 1978 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 1979 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 1980 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 1981 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 1982 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 1983 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 1984 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 1985 ierr = PetscNew(&ctx);CHKERRQ(ierr); 1986 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 1987 ctx->apply_left = PETSC_TRUE; 1988 ctx->apply_right = PETSC_FALSE; 1989 ctx->apply_p0 = PETSC_FALSE; 1990 ctx->benign_n = pcbddc->benign_n; 1991 if (reuse) { 1992 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 1993 ctx->free = PETSC_FALSE; 1994 } else { /* TODO: could be optimized for successive solves */ 1995 ISLocalToGlobalMapping N_to_D; 1996 PetscInt i; 1997 1998 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 1999 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2000 for (i=0;i<pcbddc->benign_n;i++) { 2001 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2002 } 2003 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2004 ctx->free = PETSC_TRUE; 2005 } 2006 ctx->A = pcis->A_IB; 2007 ctx->work = work; 2008 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2009 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2010 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2011 pcis->A_IB = A_IB; 2012 2013 /* A_BI as A_IB^T */ 2014 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2015 pcbddc->benign_original_mat = pcis->A_BI; 2016 pcis->A_BI = A_BI; 2017 } else { 2018 if (!pcbddc->benign_original_mat) { 2019 PetscFunctionReturn(0); 2020 } 2021 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2022 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2023 pcis->A_IB = ctx->A; 2024 ctx->A = NULL; 2025 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2026 pcis->A_BI = pcbddc->benign_original_mat; 2027 pcbddc->benign_original_mat = NULL; 2028 if (ctx->free) { 2029 PetscInt i; 2030 for (i=0;i<ctx->benign_n;i++) { 2031 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2032 } 2033 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2034 } 2035 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2036 ierr = PetscFree(ctx);CHKERRQ(ierr); 2037 } 2038 PetscFunctionReturn(0); 2039 } 2040 2041 /* used just in bddc debug mode */ 2042 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2043 { 2044 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2045 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2046 Mat An; 2047 PetscErrorCode ierr; 2048 2049 PetscFunctionBegin; 2050 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2051 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2052 if (is1) { 2053 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2054 ierr = MatDestroy(&An);CHKERRQ(ierr); 2055 } else { 2056 *B = An; 2057 } 2058 PetscFunctionReturn(0); 2059 } 2060 2061 /* TODO: add reuse flag */ 2062 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2063 { 2064 Mat Bt; 2065 PetscScalar *a,*bdata; 2066 const PetscInt *ii,*ij; 2067 PetscInt m,n,i,nnz,*bii,*bij; 2068 PetscBool flg_row; 2069 PetscErrorCode ierr; 2070 2071 PetscFunctionBegin; 2072 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2073 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2074 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2075 nnz = n; 2076 for (i=0;i<ii[n];i++) { 2077 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2078 } 2079 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2080 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2081 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2082 nnz = 0; 2083 bii[0] = 0; 2084 for (i=0;i<n;i++) { 2085 PetscInt j; 2086 for (j=ii[i];j<ii[i+1];j++) { 2087 PetscScalar entry = a[j]; 2088 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 2089 bij[nnz] = ij[j]; 2090 bdata[nnz] = entry; 2091 nnz++; 2092 } 2093 } 2094 bii[i+1] = nnz; 2095 } 2096 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2097 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2098 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2099 { 2100 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2101 b->free_a = PETSC_TRUE; 2102 b->free_ij = PETSC_TRUE; 2103 } 2104 *B = Bt; 2105 PetscFunctionReturn(0); 2106 } 2107 2108 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscInt *ncc, IS* cc[], IS* primalv) 2109 { 2110 Mat B = NULL; 2111 DM dm; 2112 IS is_dummy,*cc_n; 2113 ISLocalToGlobalMapping l2gmap_dummy; 2114 PCBDDCGraph graph; 2115 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2116 PetscInt i,n; 2117 PetscInt *xadj,*adjncy; 2118 PetscBool isplex = PETSC_FALSE; 2119 PetscErrorCode ierr; 2120 2121 PetscFunctionBegin; 2122 if (ncc) *ncc = 0; 2123 if (cc) *cc = NULL; 2124 if (primalv) *primalv = NULL; 2125 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2126 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2127 if (!dm) { 2128 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2129 } 2130 if (dm) { 2131 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2132 } 2133 if (isplex) { /* this code has been modified from plexpartition.c */ 2134 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2135 PetscInt *adj = NULL; 2136 IS cellNumbering; 2137 const PetscInt *cellNum; 2138 PetscBool useCone, useClosure; 2139 PetscSection section; 2140 PetscSegBuffer adjBuffer; 2141 PetscSF sfPoint; 2142 PetscErrorCode ierr; 2143 2144 PetscFunctionBegin; 2145 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2146 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2147 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2148 /* Build adjacency graph via a section/segbuffer */ 2149 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2150 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2151 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2152 /* Always use FVM adjacency to create partitioner graph */ 2153 ierr = DMPlexGetAdjacencyUseCone(dm, &useCone);CHKERRQ(ierr); 2154 ierr = DMPlexGetAdjacencyUseClosure(dm, &useClosure);CHKERRQ(ierr); 2155 ierr = DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);CHKERRQ(ierr); 2156 ierr = DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);CHKERRQ(ierr); 2157 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2158 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2159 for (n = 0, p = pStart; p < pEnd; p++) { 2160 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2161 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2162 adjSize = PETSC_DETERMINE; 2163 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2164 for (a = 0; a < adjSize; ++a) { 2165 const PetscInt point = adj[a]; 2166 if (pStart <= point && point < pEnd) { 2167 PetscInt *PETSC_RESTRICT pBuf; 2168 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2169 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2170 *pBuf = point; 2171 } 2172 } 2173 n++; 2174 } 2175 ierr = DMPlexSetAdjacencyUseCone(dm, useCone);CHKERRQ(ierr); 2176 ierr = DMPlexSetAdjacencyUseClosure(dm, useClosure);CHKERRQ(ierr); 2177 /* Derive CSR graph from section/segbuffer */ 2178 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2179 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2180 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2181 for (idx = 0, p = pStart; p < pEnd; p++) { 2182 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2183 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2184 } 2185 xadj[n] = size; 2186 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2187 /* Clean up */ 2188 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2189 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2190 ierr = PetscFree(adj);CHKERRQ(ierr); 2191 graph->xadj = xadj; 2192 graph->adjncy = adjncy; 2193 } else { 2194 Mat A; 2195 PetscBool filter = PETSC_FALSE, isseqaij, flg_row; 2196 2197 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2198 if (!A->rmap->N || !A->cmap->N) { 2199 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2200 PetscFunctionReturn(0); 2201 } 2202 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2203 if (!isseqaij && filter) { 2204 PetscBool isseqdense; 2205 2206 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2207 if (!isseqdense) { 2208 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2209 } else { /* TODO: rectangular case and LDA */ 2210 PetscScalar *array; 2211 PetscReal chop=1.e-6; 2212 2213 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2214 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2215 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2216 for (i=0;i<n;i++) { 2217 PetscInt j; 2218 for (j=i+1;j<n;j++) { 2219 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2220 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2221 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2222 } 2223 } 2224 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2225 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2226 } 2227 } else { 2228 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2229 B = A; 2230 } 2231 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2232 2233 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2234 if (filter) { 2235 PetscScalar *data; 2236 PetscInt j,cum; 2237 2238 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2239 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2240 cum = 0; 2241 for (i=0;i<n;i++) { 2242 PetscInt t; 2243 2244 for (j=xadj[i];j<xadj[i+1];j++) { 2245 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2246 continue; 2247 } 2248 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2249 } 2250 t = xadj_filtered[i]; 2251 xadj_filtered[i] = cum; 2252 cum += t; 2253 } 2254 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2255 graph->xadj = xadj_filtered; 2256 graph->adjncy = adjncy_filtered; 2257 } else { 2258 graph->xadj = xadj; 2259 graph->adjncy = adjncy; 2260 } 2261 } 2262 /* compute local connected components using PCBDDCGraph */ 2263 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2264 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2265 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2266 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2267 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2268 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2269 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2270 2271 /* partial clean up */ 2272 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2273 if (B) { 2274 PetscBool flg_row; 2275 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2276 ierr = MatDestroy(&B);CHKERRQ(ierr); 2277 } 2278 if (isplex) { 2279 ierr = PetscFree(xadj);CHKERRQ(ierr); 2280 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2281 } 2282 2283 /* get back data */ 2284 if (isplex) { 2285 if (ncc) *ncc = graph->ncc; 2286 if (cc || primalv) { 2287 Mat A; 2288 PetscBT btv,btvt; 2289 PetscSection subSection; 2290 PetscInt *ids,cum,cump,*cids,*pids; 2291 2292 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2293 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2294 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2295 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2296 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2297 2298 cids[0] = 0; 2299 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2300 PetscInt j; 2301 2302 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2303 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2304 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2305 2306 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2307 for (k = 0; k < 2*size; k += 2) { 2308 PetscInt s, p = closure[k], off, dof, cdof; 2309 2310 ierr = PetscSectionGetConstraintDof(subSection, p, &cdof);CHKERRQ(ierr); 2311 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2312 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2313 for (s = 0; s < dof-cdof; s++) { 2314 if (PetscBTLookupSet(btvt,off+s)) continue; 2315 if (!PetscBTLookup(btv,off+s)) { 2316 ids[cum++] = off+s; 2317 } else { /* cross-vertex */ 2318 pids[cump++] = off+s; 2319 } 2320 } 2321 } 2322 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2323 } 2324 cids[i+1] = cum; 2325 /* mark dofs as already assigned */ 2326 for (j = cids[i]; j < cids[i+1]; j++) { 2327 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2328 } 2329 } 2330 if (cc) { 2331 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2332 for (i = 0; i < graph->ncc; i++) { 2333 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2334 } 2335 *cc = cc_n; 2336 } 2337 if (primalv) { 2338 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2339 } 2340 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2341 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2342 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2343 } 2344 } else { 2345 if (ncc) *ncc = graph->ncc; 2346 if (cc) { 2347 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2348 for (i=0;i<graph->ncc;i++) { 2349 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2350 } 2351 *cc = cc_n; 2352 } 2353 } 2354 /* clean up graph */ 2355 graph->xadj = 0; 2356 graph->adjncy = 0; 2357 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2358 PetscFunctionReturn(0); 2359 } 2360 2361 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2362 { 2363 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2364 PC_IS* pcis = (PC_IS*)(pc->data); 2365 IS dirIS = NULL; 2366 PetscInt i; 2367 PetscErrorCode ierr; 2368 2369 PetscFunctionBegin; 2370 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2371 if (zerodiag) { 2372 Mat A; 2373 Vec vec3_N; 2374 PetscScalar *vals; 2375 const PetscInt *idxs; 2376 PetscInt nz,*count; 2377 2378 /* p0 */ 2379 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2380 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2381 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2382 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2383 for (i=0;i<nz;i++) vals[i] = 1.; 2384 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2385 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2386 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2387 /* v_I */ 2388 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2389 for (i=0;i<nz;i++) vals[i] = 0.; 2390 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2391 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2392 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2393 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2394 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2395 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2396 if (dirIS) { 2397 PetscInt n; 2398 2399 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2400 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2401 for (i=0;i<n;i++) vals[i] = 0.; 2402 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2403 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2404 } 2405 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2406 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2407 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2408 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2409 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2410 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2411 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2412 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2413 ierr = PetscFree(vals);CHKERRQ(ierr); 2414 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2415 2416 /* there should not be any pressure dofs lying on the interface */ 2417 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2418 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2419 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2420 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2421 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2422 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2423 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2424 ierr = PetscFree(count);CHKERRQ(ierr); 2425 } 2426 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2427 2428 /* check PCBDDCBenignGetOrSetP0 */ 2429 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2430 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2431 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2432 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2433 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2434 for (i=0;i<pcbddc->benign_n;i++) { 2435 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2436 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2437 } 2438 PetscFunctionReturn(0); 2439 } 2440 2441 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2442 { 2443 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2444 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2445 PetscInt nz,n; 2446 PetscInt *interior_dofs,n_interior_dofs,nneu; 2447 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2448 PetscErrorCode ierr; 2449 2450 PetscFunctionBegin; 2451 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2452 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2453 for (n=0;n<pcbddc->benign_n;n++) { 2454 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2455 } 2456 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2457 pcbddc->benign_n = 0; 2458 2459 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2460 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2461 Checks if all the pressure dofs in each subdomain have a zero diagonal 2462 If not, a change of basis on pressures is not needed 2463 since the local Schur complements are already SPD 2464 */ 2465 has_null_pressures = PETSC_TRUE; 2466 have_null = PETSC_TRUE; 2467 if (pcbddc->n_ISForDofsLocal) { 2468 IS iP = NULL; 2469 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2470 2471 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2472 ierr = PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2473 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2474 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2475 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2476 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2477 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2478 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2479 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2480 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2481 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2482 if (iP) { 2483 IS newpressures; 2484 2485 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2486 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2487 pressures = newpressures; 2488 } 2489 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2490 if (!sorted) { 2491 ierr = ISSort(pressures);CHKERRQ(ierr); 2492 } 2493 } else { 2494 pressures = NULL; 2495 } 2496 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2497 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2498 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2499 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2500 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2501 if (!sorted) { 2502 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2503 } 2504 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2505 zerodiag_save = zerodiag; 2506 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2507 if (!nz) { 2508 if (n) have_null = PETSC_FALSE; 2509 has_null_pressures = PETSC_FALSE; 2510 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2511 } 2512 recompute_zerodiag = PETSC_FALSE; 2513 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2514 zerodiag_subs = NULL; 2515 pcbddc->benign_n = 0; 2516 n_interior_dofs = 0; 2517 interior_dofs = NULL; 2518 nneu = 0; 2519 if (pcbddc->NeumannBoundariesLocal) { 2520 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2521 } 2522 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2523 if (checkb) { /* need to compute interior nodes */ 2524 PetscInt n,i,j; 2525 PetscInt n_neigh,*neigh,*n_shared,**shared; 2526 PetscInt *iwork; 2527 2528 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2529 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2530 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2531 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2532 for (i=1;i<n_neigh;i++) 2533 for (j=0;j<n_shared[i];j++) 2534 iwork[shared[i][j]] += 1; 2535 for (i=0;i<n;i++) 2536 if (!iwork[i]) 2537 interior_dofs[n_interior_dofs++] = i; 2538 ierr = PetscFree(iwork);CHKERRQ(ierr); 2539 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2540 } 2541 if (has_null_pressures) { 2542 IS *subs; 2543 PetscInt nsubs,i,j,nl; 2544 const PetscInt *idxs; 2545 PetscScalar *array; 2546 Vec *work; 2547 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2548 2549 subs = pcbddc->local_subs; 2550 nsubs = pcbddc->n_local_subs; 2551 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2552 if (checkb) { 2553 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2554 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2555 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2556 /* work[0] = 1_p */ 2557 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2558 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2559 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2560 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2561 /* work[0] = 1_v */ 2562 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2563 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2564 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2565 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2566 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2567 } 2568 if (nsubs > 1) { 2569 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2570 for (i=0;i<nsubs;i++) { 2571 ISLocalToGlobalMapping l2g; 2572 IS t_zerodiag_subs; 2573 PetscInt nl; 2574 2575 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2576 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2577 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2578 if (nl) { 2579 PetscBool valid = PETSC_TRUE; 2580 2581 if (checkb) { 2582 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2583 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2584 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2585 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2586 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2587 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2588 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2589 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2590 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2591 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2592 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2593 for (j=0;j<n_interior_dofs;j++) { 2594 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2595 valid = PETSC_FALSE; 2596 break; 2597 } 2598 } 2599 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2600 } 2601 if (valid && nneu) { 2602 const PetscInt *idxs; 2603 PetscInt nzb; 2604 2605 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2606 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2607 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2608 if (nzb) valid = PETSC_FALSE; 2609 } 2610 if (valid && pressures) { 2611 IS t_pressure_subs; 2612 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2613 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2614 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2615 } 2616 if (valid) { 2617 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2618 pcbddc->benign_n++; 2619 } else { 2620 recompute_zerodiag = PETSC_TRUE; 2621 } 2622 } 2623 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2624 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2625 } 2626 } else { /* there's just one subdomain (or zero if they have not been detected */ 2627 PetscBool valid = PETSC_TRUE; 2628 2629 if (nneu) valid = PETSC_FALSE; 2630 if (valid && pressures) { 2631 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2632 } 2633 if (valid && checkb) { 2634 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2635 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2636 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2637 for (j=0;j<n_interior_dofs;j++) { 2638 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2639 valid = PETSC_FALSE; 2640 break; 2641 } 2642 } 2643 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2644 } 2645 if (valid) { 2646 pcbddc->benign_n = 1; 2647 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2648 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2649 zerodiag_subs[0] = zerodiag; 2650 } 2651 } 2652 if (checkb) { 2653 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2654 } 2655 } 2656 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2657 2658 if (!pcbddc->benign_n) { 2659 PetscInt n; 2660 2661 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2662 recompute_zerodiag = PETSC_FALSE; 2663 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2664 if (n) { 2665 has_null_pressures = PETSC_FALSE; 2666 have_null = PETSC_FALSE; 2667 } 2668 } 2669 2670 /* final check for null pressures */ 2671 if (zerodiag && pressures) { 2672 PetscInt nz,np; 2673 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2674 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2675 if (nz != np) have_null = PETSC_FALSE; 2676 } 2677 2678 if (recompute_zerodiag) { 2679 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2680 if (pcbddc->benign_n == 1) { 2681 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2682 zerodiag = zerodiag_subs[0]; 2683 } else { 2684 PetscInt i,nzn,*new_idxs; 2685 2686 nzn = 0; 2687 for (i=0;i<pcbddc->benign_n;i++) { 2688 PetscInt ns; 2689 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2690 nzn += ns; 2691 } 2692 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2693 nzn = 0; 2694 for (i=0;i<pcbddc->benign_n;i++) { 2695 PetscInt ns,*idxs; 2696 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2697 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2698 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2699 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2700 nzn += ns; 2701 } 2702 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2703 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2704 } 2705 have_null = PETSC_FALSE; 2706 } 2707 2708 /* Prepare matrix to compute no-net-flux */ 2709 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2710 Mat A,loc_divudotp; 2711 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2712 IS row,col,isused = NULL; 2713 PetscInt M,N,n,st,n_isused; 2714 2715 if (pressures) { 2716 isused = pressures; 2717 } else { 2718 isused = zerodiag_save; 2719 } 2720 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2721 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2722 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2723 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2724 n_isused = 0; 2725 if (isused) { 2726 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2727 } 2728 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2729 st = st-n_isused; 2730 if (n) { 2731 const PetscInt *gidxs; 2732 2733 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2734 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2735 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2736 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2737 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2738 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2739 } else { 2740 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2741 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2742 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2743 } 2744 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2745 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2746 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2747 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2748 ierr = ISDestroy(&row);CHKERRQ(ierr); 2749 ierr = ISDestroy(&col);CHKERRQ(ierr); 2750 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2751 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2752 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2753 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2754 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2755 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2756 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2757 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2758 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2759 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2760 } 2761 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2762 2763 /* change of basis and p0 dofs */ 2764 if (has_null_pressures) { 2765 IS zerodiagc; 2766 const PetscInt *idxs,*idxsc; 2767 PetscInt i,s,*nnz; 2768 2769 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2770 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2771 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2772 /* local change of basis for pressures */ 2773 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2774 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2775 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2776 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2777 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2778 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2779 for (i=0;i<pcbddc->benign_n;i++) { 2780 PetscInt nzs,j; 2781 2782 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2783 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2784 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2785 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2786 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2787 } 2788 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2789 ierr = PetscFree(nnz);CHKERRQ(ierr); 2790 /* set identity on velocities */ 2791 for (i=0;i<n-nz;i++) { 2792 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2793 } 2794 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2795 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2796 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2797 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2798 /* set change on pressures */ 2799 for (s=0;s<pcbddc->benign_n;s++) { 2800 PetscScalar *array; 2801 PetscInt nzs; 2802 2803 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2804 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2805 for (i=0;i<nzs-1;i++) { 2806 PetscScalar vals[2]; 2807 PetscInt cols[2]; 2808 2809 cols[0] = idxs[i]; 2810 cols[1] = idxs[nzs-1]; 2811 vals[0] = 1.; 2812 vals[1] = 1.; 2813 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2814 } 2815 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2816 for (i=0;i<nzs-1;i++) array[i] = -1.; 2817 array[nzs-1] = 1.; 2818 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2819 /* store local idxs for p0 */ 2820 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2821 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2822 ierr = PetscFree(array);CHKERRQ(ierr); 2823 } 2824 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2825 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2826 /* project if needed */ 2827 if (pcbddc->benign_change_explicit) { 2828 Mat M; 2829 2830 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2831 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2832 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2833 ierr = MatDestroy(&M);CHKERRQ(ierr); 2834 } 2835 /* store global idxs for p0 */ 2836 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2837 } 2838 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2839 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2840 2841 /* determines if the coarse solver will be singular or not */ 2842 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2843 /* determines if the problem has subdomains with 0 pressure block */ 2844 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2845 *zerodiaglocal = zerodiag; 2846 PetscFunctionReturn(0); 2847 } 2848 2849 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2850 { 2851 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2852 PetscScalar *array; 2853 PetscErrorCode ierr; 2854 2855 PetscFunctionBegin; 2856 if (!pcbddc->benign_sf) { 2857 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2858 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2859 } 2860 if (get) { 2861 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2862 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2863 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2864 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2865 } else { 2866 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2867 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2868 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2869 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2870 } 2871 PetscFunctionReturn(0); 2872 } 2873 2874 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2875 { 2876 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2877 PetscErrorCode ierr; 2878 2879 PetscFunctionBegin; 2880 /* TODO: add error checking 2881 - avoid nested pop (or push) calls. 2882 - cannot push before pop. 2883 - cannot call this if pcbddc->local_mat is NULL 2884 */ 2885 if (!pcbddc->benign_n) { 2886 PetscFunctionReturn(0); 2887 } 2888 if (pop) { 2889 if (pcbddc->benign_change_explicit) { 2890 IS is_p0; 2891 MatReuse reuse; 2892 2893 /* extract B_0 */ 2894 reuse = MAT_INITIAL_MATRIX; 2895 if (pcbddc->benign_B0) { 2896 reuse = MAT_REUSE_MATRIX; 2897 } 2898 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2899 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2900 /* remove rows and cols from local problem */ 2901 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2902 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2903 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2904 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2905 } else { 2906 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2907 PetscScalar *vals; 2908 PetscInt i,n,*idxs_ins; 2909 2910 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2911 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2912 if (!pcbddc->benign_B0) { 2913 PetscInt *nnz; 2914 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2915 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2916 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2917 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2918 for (i=0;i<pcbddc->benign_n;i++) { 2919 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2920 nnz[i] = n - nnz[i]; 2921 } 2922 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2923 ierr = PetscFree(nnz);CHKERRQ(ierr); 2924 } 2925 2926 for (i=0;i<pcbddc->benign_n;i++) { 2927 PetscScalar *array; 2928 PetscInt *idxs,j,nz,cum; 2929 2930 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2931 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2932 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2933 for (j=0;j<nz;j++) vals[j] = 1.; 2934 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2935 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2936 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2937 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2938 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2939 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2940 cum = 0; 2941 for (j=0;j<n;j++) { 2942 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2943 vals[cum] = array[j]; 2944 idxs_ins[cum] = j; 2945 cum++; 2946 } 2947 } 2948 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2949 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2950 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2951 } 2952 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2953 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2954 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2955 } 2956 } else { /* push */ 2957 if (pcbddc->benign_change_explicit) { 2958 PetscInt i; 2959 2960 for (i=0;i<pcbddc->benign_n;i++) { 2961 PetscScalar *B0_vals; 2962 PetscInt *B0_cols,B0_ncol; 2963 2964 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2965 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2966 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2967 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2968 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2969 } 2970 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2971 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2972 } else { 2973 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 2974 } 2975 } 2976 PetscFunctionReturn(0); 2977 } 2978 2979 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 2980 { 2981 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2982 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 2983 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 2984 PetscBLASInt *B_iwork,*B_ifail; 2985 PetscScalar *work,lwork; 2986 PetscScalar *St,*S,*eigv; 2987 PetscScalar *Sarray,*Starray; 2988 PetscReal *eigs,thresh; 2989 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 2990 PetscBool allocated_S_St; 2991 #if defined(PETSC_USE_COMPLEX) 2992 PetscReal *rwork; 2993 #endif 2994 PetscErrorCode ierr; 2995 2996 PetscFunctionBegin; 2997 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 2998 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 2999 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\nRerun with -sub_schurs_hermitian 1 -sub_schurs_posdef 1 if the problem is SPD",sub_schurs->is_hermitian,sub_schurs->is_posdef); 3000 3001 if (pcbddc->dbg_flag) { 3002 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3003 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3004 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3005 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3006 } 3007 3008 if (pcbddc->dbg_flag) { 3009 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 3010 } 3011 3012 /* max size of subsets */ 3013 mss = 0; 3014 for (i=0;i<sub_schurs->n_subs;i++) { 3015 PetscInt subset_size; 3016 3017 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3018 mss = PetscMax(mss,subset_size); 3019 } 3020 3021 /* min/max and threshold */ 3022 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3023 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3024 nmax = PetscMax(nmin,nmax); 3025 allocated_S_St = PETSC_FALSE; 3026 if (nmin) { 3027 allocated_S_St = PETSC_TRUE; 3028 } 3029 3030 /* allocate lapack workspace */ 3031 cum = cum2 = 0; 3032 maxneigs = 0; 3033 for (i=0;i<sub_schurs->n_subs;i++) { 3034 PetscInt n,subset_size; 3035 3036 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3037 n = PetscMin(subset_size,nmax); 3038 cum += subset_size; 3039 cum2 += subset_size*n; 3040 maxneigs = PetscMax(maxneigs,n); 3041 } 3042 if (mss) { 3043 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 3044 PetscBLASInt B_itype = 1; 3045 PetscBLASInt B_N = mss; 3046 PetscReal zero = 0.0; 3047 PetscReal eps = 0.0; /* dlamch? */ 3048 3049 B_lwork = -1; 3050 S = NULL; 3051 St = NULL; 3052 eigs = NULL; 3053 eigv = NULL; 3054 B_iwork = NULL; 3055 B_ifail = NULL; 3056 #if defined(PETSC_USE_COMPLEX) 3057 rwork = NULL; 3058 #endif 3059 thresh = 1.0; 3060 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3061 #if defined(PETSC_USE_COMPLEX) 3062 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3063 #else 3064 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3065 #endif 3066 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3067 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3068 } else { 3069 /* TODO */ 3070 } 3071 } else { 3072 lwork = 0; 3073 } 3074 3075 nv = 0; 3076 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3077 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3078 } 3079 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3080 if (allocated_S_St) { 3081 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3082 } 3083 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3084 #if defined(PETSC_USE_COMPLEX) 3085 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3086 #endif 3087 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3088 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3089 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3090 nv+cum,&pcbddc->adaptive_constraints_idxs, 3091 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3092 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 3093 3094 maxneigs = 0; 3095 cum = cumarray = 0; 3096 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3097 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3098 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3099 const PetscInt *idxs; 3100 3101 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3102 for (cum=0;cum<nv;cum++) { 3103 pcbddc->adaptive_constraints_n[cum] = 1; 3104 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3105 pcbddc->adaptive_constraints_data[cum] = 1.0; 3106 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3107 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3108 } 3109 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3110 } 3111 3112 if (mss) { /* multilevel */ 3113 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3114 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3115 } 3116 3117 thresh = pcbddc->adaptive_threshold; 3118 for (i=0;i<sub_schurs->n_subs;i++) { 3119 const PetscInt *idxs; 3120 PetscReal upper,lower; 3121 PetscInt j,subset_size,eigs_start = 0; 3122 PetscBLASInt B_N; 3123 PetscBool same_data = PETSC_FALSE; 3124 3125 if (pcbddc->use_deluxe_scaling) { 3126 upper = PETSC_MAX_REAL; 3127 lower = thresh; 3128 } else { 3129 upper = 1./thresh; 3130 lower = 0.; 3131 } 3132 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3133 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3134 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3135 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3136 if (sub_schurs->is_hermitian) { 3137 PetscInt j,k; 3138 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 3139 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3140 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3141 } 3142 for (j=0;j<subset_size;j++) { 3143 for (k=j;k<subset_size;k++) { 3144 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3145 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3146 } 3147 } 3148 } else { 3149 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3150 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3151 } 3152 } else { 3153 S = Sarray + cumarray; 3154 St = Starray + cumarray; 3155 } 3156 /* see if we can save some work */ 3157 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3158 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 3159 } 3160 3161 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3162 B_neigs = 0; 3163 } else { 3164 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 3165 PetscBLASInt B_itype = 1; 3166 PetscBLASInt B_IL, B_IU; 3167 PetscReal eps = -1.0; /* dlamch? */ 3168 PetscInt nmin_s; 3169 PetscBool compute_range = PETSC_FALSE; 3170 3171 if (pcbddc->dbg_flag) { 3172 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]); 3173 } 3174 3175 compute_range = PETSC_FALSE; 3176 if (thresh > 1.+PETSC_SMALL && !same_data) { 3177 compute_range = PETSC_TRUE; 3178 } 3179 3180 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3181 if (compute_range) { 3182 3183 /* ask for eigenvalues larger than thresh */ 3184 #if defined(PETSC_USE_COMPLEX) 3185 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3186 #else 3187 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3188 #endif 3189 } else if (!same_data) { 3190 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3191 B_IL = 1; 3192 #if defined(PETSC_USE_COMPLEX) 3193 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3194 #else 3195 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3196 #endif 3197 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3198 PetscInt k; 3199 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3200 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3201 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3202 nmin = nmax; 3203 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 3204 for (k=0;k<nmax;k++) { 3205 eigs[k] = 1./PETSC_SMALL; 3206 eigv[k*(subset_size+1)] = 1.0; 3207 } 3208 } 3209 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3210 if (B_ierr) { 3211 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3212 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3213 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3214 } 3215 3216 if (B_neigs > nmax) { 3217 if (pcbddc->dbg_flag) { 3218 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 3219 } 3220 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 3221 B_neigs = nmax; 3222 } 3223 3224 nmin_s = PetscMin(nmin,B_N); 3225 if (B_neigs < nmin_s) { 3226 PetscBLASInt B_neigs2; 3227 3228 if (pcbddc->use_deluxe_scaling) { 3229 B_IL = B_N - nmin_s + 1; 3230 B_IU = B_N - B_neigs; 3231 } else { 3232 B_IL = B_neigs + 1; 3233 B_IU = nmin_s; 3234 } 3235 if (pcbddc->dbg_flag) { 3236 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 3237 } 3238 if (sub_schurs->is_hermitian) { 3239 PetscInt j,k; 3240 for (j=0;j<subset_size;j++) { 3241 for (k=j;k<subset_size;k++) { 3242 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3243 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3244 } 3245 } 3246 } else { 3247 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3248 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3249 } 3250 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3251 #if defined(PETSC_USE_COMPLEX) 3252 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3253 #else 3254 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3255 #endif 3256 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3257 B_neigs += B_neigs2; 3258 } 3259 if (B_ierr) { 3260 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3261 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3262 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3263 } 3264 if (pcbddc->dbg_flag) { 3265 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3266 for (j=0;j<B_neigs;j++) { 3267 if (eigs[j] == 0.0) { 3268 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3269 } else { 3270 if (pcbddc->use_deluxe_scaling) { 3271 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3272 } else { 3273 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3274 } 3275 } 3276 } 3277 } 3278 } else { 3279 /* TODO */ 3280 } 3281 } 3282 /* change the basis back to the original one */ 3283 if (sub_schurs->change) { 3284 Mat change,phi,phit; 3285 3286 if (pcbddc->dbg_flag > 2) { 3287 PetscInt ii; 3288 for (ii=0;ii<B_neigs;ii++) { 3289 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3290 for (j=0;j<B_N;j++) { 3291 #if defined(PETSC_USE_COMPLEX) 3292 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3293 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3294 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3295 #else 3296 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3297 #endif 3298 } 3299 } 3300 } 3301 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3302 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3303 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3304 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3305 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3306 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3307 } 3308 maxneigs = PetscMax(B_neigs,maxneigs); 3309 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3310 if (B_neigs) { 3311 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3312 3313 if (pcbddc->dbg_flag > 1) { 3314 PetscInt ii; 3315 for (ii=0;ii<B_neigs;ii++) { 3316 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3317 for (j=0;j<B_N;j++) { 3318 #if defined(PETSC_USE_COMPLEX) 3319 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3320 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3321 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3322 #else 3323 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3324 #endif 3325 } 3326 } 3327 } 3328 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3329 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3330 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3331 cum++; 3332 } 3333 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3334 /* shift for next computation */ 3335 cumarray += subset_size*subset_size; 3336 } 3337 if (pcbddc->dbg_flag) { 3338 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3339 } 3340 3341 if (mss) { 3342 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3343 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3344 /* destroy matrices (junk) */ 3345 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3346 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3347 } 3348 if (allocated_S_St) { 3349 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3350 } 3351 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3352 #if defined(PETSC_USE_COMPLEX) 3353 ierr = PetscFree(rwork);CHKERRQ(ierr); 3354 #endif 3355 if (pcbddc->dbg_flag) { 3356 PetscInt maxneigs_r; 3357 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3358 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3359 } 3360 PetscFunctionReturn(0); 3361 } 3362 3363 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3364 { 3365 PetscScalar *coarse_submat_vals; 3366 PetscErrorCode ierr; 3367 3368 PetscFunctionBegin; 3369 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3370 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3371 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3372 3373 /* Setup local neumann solver ksp_R */ 3374 /* PCBDDCSetUpLocalScatters should be called first! */ 3375 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3376 3377 /* 3378 Setup local correction and local part of coarse basis. 3379 Gives back the dense local part of the coarse matrix in column major ordering 3380 */ 3381 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3382 3383 /* Compute total number of coarse nodes and setup coarse solver */ 3384 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3385 3386 /* free */ 3387 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3388 PetscFunctionReturn(0); 3389 } 3390 3391 PetscErrorCode PCBDDCResetCustomization(PC pc) 3392 { 3393 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3394 PetscErrorCode ierr; 3395 3396 PetscFunctionBegin; 3397 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3398 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3399 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3400 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3401 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3402 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3403 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3404 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3405 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3406 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3407 PetscFunctionReturn(0); 3408 } 3409 3410 PetscErrorCode PCBDDCResetTopography(PC pc) 3411 { 3412 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3413 PetscInt i; 3414 PetscErrorCode ierr; 3415 3416 PetscFunctionBegin; 3417 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3418 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3419 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3420 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3421 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3422 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3423 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3424 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3425 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3426 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3427 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3428 for (i=0;i<pcbddc->n_local_subs;i++) { 3429 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3430 } 3431 pcbddc->n_local_subs = 0; 3432 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3433 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3434 pcbddc->graphanalyzed = PETSC_FALSE; 3435 pcbddc->recompute_topography = PETSC_TRUE; 3436 PetscFunctionReturn(0); 3437 } 3438 3439 PetscErrorCode PCBDDCResetSolvers(PC pc) 3440 { 3441 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3442 PetscErrorCode ierr; 3443 3444 PetscFunctionBegin; 3445 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3446 if (pcbddc->coarse_phi_B) { 3447 PetscScalar *array; 3448 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3449 ierr = PetscFree(array);CHKERRQ(ierr); 3450 } 3451 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3452 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3453 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3454 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3455 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3456 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3457 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3458 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3459 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3460 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3461 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3462 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3463 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3464 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3465 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3466 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3467 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3468 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3469 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3470 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3471 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3472 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3473 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3474 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3475 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3476 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3477 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3478 if (pcbddc->benign_zerodiag_subs) { 3479 PetscInt i; 3480 for (i=0;i<pcbddc->benign_n;i++) { 3481 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3482 } 3483 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3484 } 3485 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3486 PetscFunctionReturn(0); 3487 } 3488 3489 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3490 { 3491 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3492 PC_IS *pcis = (PC_IS*)pc->data; 3493 VecType impVecType; 3494 PetscInt n_constraints,n_R,old_size; 3495 PetscErrorCode ierr; 3496 3497 PetscFunctionBegin; 3498 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3499 n_R = pcis->n - pcbddc->n_vertices; 3500 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3501 /* local work vectors (try to avoid unneeded work)*/ 3502 /* R nodes */ 3503 old_size = -1; 3504 if (pcbddc->vec1_R) { 3505 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3506 } 3507 if (n_R != old_size) { 3508 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3509 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3510 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3511 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3512 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3513 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3514 } 3515 /* local primal dofs */ 3516 old_size = -1; 3517 if (pcbddc->vec1_P) { 3518 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3519 } 3520 if (pcbddc->local_primal_size != old_size) { 3521 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3522 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3523 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3524 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3525 } 3526 /* local explicit constraints */ 3527 old_size = -1; 3528 if (pcbddc->vec1_C) { 3529 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3530 } 3531 if (n_constraints && n_constraints != old_size) { 3532 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3533 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3534 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3535 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3536 } 3537 PetscFunctionReturn(0); 3538 } 3539 3540 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3541 { 3542 PetscErrorCode ierr; 3543 /* pointers to pcis and pcbddc */ 3544 PC_IS* pcis = (PC_IS*)pc->data; 3545 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3546 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3547 /* submatrices of local problem */ 3548 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3549 /* submatrices of local coarse problem */ 3550 Mat S_VV,S_CV,S_VC,S_CC; 3551 /* working matrices */ 3552 Mat C_CR; 3553 /* additional working stuff */ 3554 PC pc_R; 3555 Mat F,Brhs = NULL; 3556 Vec dummy_vec; 3557 PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs; 3558 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3559 PetscScalar *work; 3560 PetscInt *idx_V_B; 3561 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3562 PetscInt i,n_R,n_D,n_B; 3563 3564 /* some shortcuts to scalars */ 3565 PetscScalar one=1.0,m_one=-1.0; 3566 3567 PetscFunctionBegin; 3568 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3569 3570 /* Set Non-overlapping dimensions */ 3571 n_vertices = pcbddc->n_vertices; 3572 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3573 n_B = pcis->n_B; 3574 n_D = pcis->n - n_B; 3575 n_R = pcis->n - n_vertices; 3576 3577 /* vertices in boundary numbering */ 3578 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3579 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3580 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3581 3582 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3583 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3584 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3585 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3586 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3587 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3588 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3589 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3590 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3591 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3592 3593 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3594 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3595 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3596 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3597 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3598 lda_rhs = n_R; 3599 need_benign_correction = PETSC_FALSE; 3600 if (isLU || isILU || isCHOL) { 3601 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3602 } else if (sub_schurs && sub_schurs->reuse_solver) { 3603 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3604 MatFactorType type; 3605 3606 F = reuse_solver->F; 3607 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3608 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3609 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3610 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3611 } else { 3612 F = NULL; 3613 } 3614 3615 /* determine if we can use a sparse right-hand side */ 3616 sparserhs = PETSC_FALSE; 3617 if (F) { 3618 MatSolverType solver; 3619 3620 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3621 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3622 } 3623 3624 /* allocate workspace */ 3625 n = 0; 3626 if (n_constraints) { 3627 n += lda_rhs*n_constraints; 3628 } 3629 if (n_vertices) { 3630 n = PetscMax(2*lda_rhs*n_vertices,n); 3631 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3632 } 3633 if (!pcbddc->symmetric_primal) { 3634 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3635 } 3636 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3637 3638 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3639 dummy_vec = NULL; 3640 if (need_benign_correction && lda_rhs != n_R && F) { 3641 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3642 } 3643 3644 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3645 if (n_constraints) { 3646 Mat M3,C_B; 3647 IS is_aux; 3648 PetscScalar *array,*array2; 3649 3650 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3651 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3652 3653 /* Extract constraints on R nodes: C_{CR} */ 3654 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3655 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3656 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3657 3658 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3659 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3660 if (!sparserhs) { 3661 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3662 for (i=0;i<n_constraints;i++) { 3663 const PetscScalar *row_cmat_values; 3664 const PetscInt *row_cmat_indices; 3665 PetscInt size_of_constraint,j; 3666 3667 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3668 for (j=0;j<size_of_constraint;j++) { 3669 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3670 } 3671 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3672 } 3673 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 3674 } else { 3675 Mat tC_CR; 3676 3677 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3678 if (lda_rhs != n_R) { 3679 PetscScalar *aa; 3680 PetscInt r,*ii,*jj; 3681 PetscBool done; 3682 3683 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3684 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 3685 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 3686 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 3687 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3688 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 3689 } else { 3690 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 3691 tC_CR = C_CR; 3692 } 3693 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 3694 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 3695 } 3696 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3697 if (F) { 3698 if (need_benign_correction) { 3699 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3700 3701 /* rhs is already zero on interior dofs, no need to change the rhs */ 3702 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3703 } 3704 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 3705 if (need_benign_correction) { 3706 PetscScalar *marr; 3707 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3708 3709 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3710 if (lda_rhs != n_R) { 3711 for (i=0;i<n_constraints;i++) { 3712 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3713 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3714 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3715 } 3716 } else { 3717 for (i=0;i<n_constraints;i++) { 3718 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3719 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3720 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3721 } 3722 } 3723 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3724 } 3725 } else { 3726 PetscScalar *marr; 3727 3728 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3729 for (i=0;i<n_constraints;i++) { 3730 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3731 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3732 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3733 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3734 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3735 } 3736 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3737 } 3738 if (sparserhs) { 3739 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3740 } 3741 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3742 if (!pcbddc->switch_static) { 3743 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3744 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3745 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3746 for (i=0;i<n_constraints;i++) { 3747 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3748 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3749 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3750 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3751 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3752 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3753 } 3754 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3755 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3756 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3757 } else { 3758 if (lda_rhs != n_R) { 3759 IS dummy; 3760 3761 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3762 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3763 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3764 } else { 3765 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3766 pcbddc->local_auxmat2 = local_auxmat2_R; 3767 } 3768 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3769 } 3770 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3771 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3772 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3773 if (isCHOL) { 3774 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3775 } else { 3776 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3777 } 3778 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 3779 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3780 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3781 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3782 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3783 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3784 } 3785 3786 /* Get submatrices from subdomain matrix */ 3787 if (n_vertices) { 3788 IS is_aux; 3789 PetscBool isseqaij; 3790 3791 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3792 IS tis; 3793 3794 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3795 ierr = ISSort(tis);CHKERRQ(ierr); 3796 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3797 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3798 } else { 3799 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3800 } 3801 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3802 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3803 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3804 if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */ 3805 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3806 } 3807 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3808 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3809 } 3810 3811 /* Matrix of coarse basis functions (local) */ 3812 if (pcbddc->coarse_phi_B) { 3813 PetscInt on_B,on_primal,on_D=n_D; 3814 if (pcbddc->coarse_phi_D) { 3815 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3816 } 3817 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3818 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3819 PetscScalar *marray; 3820 3821 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3822 ierr = PetscFree(marray);CHKERRQ(ierr); 3823 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3824 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3825 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3826 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3827 } 3828 } 3829 3830 if (!pcbddc->coarse_phi_B) { 3831 PetscScalar *marr; 3832 3833 /* memory size */ 3834 n = n_B*pcbddc->local_primal_size; 3835 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 3836 if (!pcbddc->symmetric_primal) n *= 2; 3837 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 3838 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3839 marr += n_B*pcbddc->local_primal_size; 3840 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3841 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3842 marr += n_D*pcbddc->local_primal_size; 3843 } 3844 if (!pcbddc->symmetric_primal) { 3845 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3846 marr += n_B*pcbddc->local_primal_size; 3847 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3848 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3849 } 3850 } else { 3851 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3852 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3853 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3854 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3855 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3856 } 3857 } 3858 } 3859 3860 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3861 p0_lidx_I = NULL; 3862 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3863 const PetscInt *idxs; 3864 3865 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3866 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3867 for (i=0;i<pcbddc->benign_n;i++) { 3868 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3869 } 3870 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3871 } 3872 3873 /* vertices */ 3874 if (n_vertices) { 3875 PetscBool restoreavr = PETSC_FALSE; 3876 3877 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3878 3879 if (n_R) { 3880 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3881 PetscBLASInt B_N,B_one = 1; 3882 PetscScalar *x,*y; 3883 3884 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3885 if (need_benign_correction) { 3886 ISLocalToGlobalMapping RtoN; 3887 IS is_p0; 3888 PetscInt *idxs_p0,n; 3889 3890 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3891 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3892 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3893 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3894 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3895 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3896 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3897 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3898 } 3899 3900 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3901 if (!sparserhs || need_benign_correction) { 3902 if (lda_rhs == n_R) { 3903 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3904 } else { 3905 PetscScalar *av,*array; 3906 const PetscInt *xadj,*adjncy; 3907 PetscInt n; 3908 PetscBool flg_row; 3909 3910 array = work+lda_rhs*n_vertices; 3911 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3912 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3913 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3914 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3915 for (i=0;i<n;i++) { 3916 PetscInt j; 3917 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3918 } 3919 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3920 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3921 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3922 } 3923 if (need_benign_correction) { 3924 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3925 PetscScalar *marr; 3926 3927 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3928 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3929 3930 | 0 0 0 | (V) 3931 L = | 0 0 -1 | (P-p0) 3932 | 0 0 -1 | (p0) 3933 3934 */ 3935 for (i=0;i<reuse_solver->benign_n;i++) { 3936 const PetscScalar *vals; 3937 const PetscInt *idxs,*idxs_zero; 3938 PetscInt n,j,nz; 3939 3940 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3941 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3942 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3943 for (j=0;j<n;j++) { 3944 PetscScalar val = vals[j]; 3945 PetscInt k,col = idxs[j]; 3946 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3947 } 3948 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3949 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3950 } 3951 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3952 } 3953 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 3954 Brhs = A_RV; 3955 } else { 3956 Mat tA_RVT,A_RVT; 3957 3958 if (!pcbddc->symmetric_primal) { 3959 /* A_RV already scaled by -1 */ 3960 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 3961 } else { 3962 restoreavr = PETSC_TRUE; 3963 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 3964 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 3965 A_RVT = A_VR; 3966 } 3967 if (lda_rhs != n_R) { 3968 PetscScalar *aa; 3969 PetscInt r,*ii,*jj; 3970 PetscBool done; 3971 3972 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3973 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 3974 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 3975 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 3976 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3977 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 3978 } else { 3979 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 3980 tA_RVT = A_RVT; 3981 } 3982 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 3983 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 3984 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 3985 } 3986 if (F) { 3987 /* need to correct the rhs */ 3988 if (need_benign_correction) { 3989 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3990 PetscScalar *marr; 3991 3992 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 3993 if (lda_rhs != n_R) { 3994 for (i=0;i<n_vertices;i++) { 3995 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3996 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3997 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3998 } 3999 } else { 4000 for (i=0;i<n_vertices;i++) { 4001 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4002 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4003 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4004 } 4005 } 4006 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4007 } 4008 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4009 if (restoreavr) { 4010 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4011 } 4012 /* need to correct the solution */ 4013 if (need_benign_correction) { 4014 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4015 PetscScalar *marr; 4016 4017 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4018 if (lda_rhs != n_R) { 4019 for (i=0;i<n_vertices;i++) { 4020 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4021 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4022 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4023 } 4024 } else { 4025 for (i=0;i<n_vertices;i++) { 4026 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4027 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4028 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4029 } 4030 } 4031 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4032 } 4033 } else { 4034 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4035 for (i=0;i<n_vertices;i++) { 4036 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4037 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4038 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4039 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4040 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4041 } 4042 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4043 } 4044 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4045 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4046 /* S_VV and S_CV */ 4047 if (n_constraints) { 4048 Mat B; 4049 4050 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4051 for (i=0;i<n_vertices;i++) { 4052 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4053 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4054 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4055 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4056 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4057 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4058 } 4059 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4060 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 4061 ierr = MatDestroy(&B);CHKERRQ(ierr); 4062 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4063 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4064 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4065 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4066 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4067 ierr = MatDestroy(&B);CHKERRQ(ierr); 4068 } 4069 if (lda_rhs != n_R) { 4070 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4071 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4072 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4073 } 4074 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4075 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4076 if (need_benign_correction) { 4077 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4078 PetscScalar *marr,*sums; 4079 4080 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4081 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4082 for (i=0;i<reuse_solver->benign_n;i++) { 4083 const PetscScalar *vals; 4084 const PetscInt *idxs,*idxs_zero; 4085 PetscInt n,j,nz; 4086 4087 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4088 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4089 for (j=0;j<n_vertices;j++) { 4090 PetscInt k; 4091 sums[j] = 0.; 4092 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4093 } 4094 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4095 for (j=0;j<n;j++) { 4096 PetscScalar val = vals[j]; 4097 PetscInt k; 4098 for (k=0;k<n_vertices;k++) { 4099 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4100 } 4101 } 4102 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4103 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4104 } 4105 ierr = PetscFree(sums);CHKERRQ(ierr); 4106 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4107 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4108 } 4109 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4110 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4111 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 4112 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4113 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4114 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 4115 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4116 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4117 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4118 } else { 4119 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4120 } 4121 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4122 4123 /* coarse basis functions */ 4124 for (i=0;i<n_vertices;i++) { 4125 PetscScalar *y; 4126 4127 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4128 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4129 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4130 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4131 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4132 y[n_B*i+idx_V_B[i]] = 1.0; 4133 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4134 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4135 4136 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4137 PetscInt j; 4138 4139 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4140 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4141 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4142 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4143 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4144 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4145 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4146 } 4147 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4148 } 4149 /* if n_R == 0 the object is not destroyed */ 4150 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4151 } 4152 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4153 4154 if (n_constraints) { 4155 Mat B; 4156 4157 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4158 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4159 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4160 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4161 if (n_vertices) { 4162 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4163 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4164 } else { 4165 Mat S_VCt; 4166 4167 if (lda_rhs != n_R) { 4168 ierr = MatDestroy(&B);CHKERRQ(ierr); 4169 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4170 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4171 } 4172 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4173 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4174 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4175 } 4176 } 4177 ierr = MatDestroy(&B);CHKERRQ(ierr); 4178 /* coarse basis functions */ 4179 for (i=0;i<n_constraints;i++) { 4180 PetscScalar *y; 4181 4182 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4183 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4184 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4185 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4186 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4187 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4188 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4189 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4190 PetscInt j; 4191 4192 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4193 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4194 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4195 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4196 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4197 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4198 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4199 } 4200 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4201 } 4202 } 4203 if (n_constraints) { 4204 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4205 } 4206 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4207 4208 /* coarse matrix entries relative to B_0 */ 4209 if (pcbddc->benign_n) { 4210 Mat B0_B,B0_BPHI; 4211 IS is_dummy; 4212 PetscScalar *data; 4213 PetscInt j; 4214 4215 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4216 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4217 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4218 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4219 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4220 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 4221 for (j=0;j<pcbddc->benign_n;j++) { 4222 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4223 for (i=0;i<pcbddc->local_primal_size;i++) { 4224 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4225 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4226 } 4227 } 4228 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 4229 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4230 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4231 } 4232 4233 /* compute other basis functions for non-symmetric problems */ 4234 if (!pcbddc->symmetric_primal) { 4235 Mat B_V=NULL,B_C=NULL; 4236 PetscScalar *marray; 4237 4238 if (n_constraints) { 4239 Mat S_CCT,C_CRT; 4240 4241 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4242 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4243 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4244 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4245 if (n_vertices) { 4246 Mat S_VCT; 4247 4248 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4249 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4250 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4251 } 4252 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4253 } else { 4254 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4255 } 4256 if (n_vertices && n_R) { 4257 PetscScalar *av,*marray; 4258 const PetscInt *xadj,*adjncy; 4259 PetscInt n; 4260 PetscBool flg_row; 4261 4262 /* B_V = B_V - A_VR^T */ 4263 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4264 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4265 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4266 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4267 for (i=0;i<n;i++) { 4268 PetscInt j; 4269 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4270 } 4271 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4272 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4273 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4274 } 4275 4276 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4277 if (n_vertices) { 4278 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4279 for (i=0;i<n_vertices;i++) { 4280 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4281 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4282 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4283 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4284 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4285 } 4286 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4287 } 4288 if (B_C) { 4289 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4290 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4291 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4292 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4293 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4294 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4295 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4296 } 4297 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4298 } 4299 /* coarse basis functions */ 4300 for (i=0;i<pcbddc->local_primal_size;i++) { 4301 PetscScalar *y; 4302 4303 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4304 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4305 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4306 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4307 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4308 if (i<n_vertices) { 4309 y[n_B*i+idx_V_B[i]] = 1.0; 4310 } 4311 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4312 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4313 4314 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4315 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4316 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4317 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4318 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4319 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4320 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4321 } 4322 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4323 } 4324 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4325 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4326 } 4327 4328 /* free memory */ 4329 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4330 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4331 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4332 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4333 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4334 ierr = PetscFree(work);CHKERRQ(ierr); 4335 if (n_vertices) { 4336 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4337 } 4338 if (n_constraints) { 4339 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4340 } 4341 /* Checking coarse_sub_mat and coarse basis functios */ 4342 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4343 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4344 if (pcbddc->dbg_flag) { 4345 Mat coarse_sub_mat; 4346 Mat AUXMAT,TM1,TM2,TM3,TM4; 4347 Mat coarse_phi_D,coarse_phi_B; 4348 Mat coarse_psi_D,coarse_psi_B; 4349 Mat A_II,A_BB,A_IB,A_BI; 4350 Mat C_B,CPHI; 4351 IS is_dummy; 4352 Vec mones; 4353 MatType checkmattype=MATSEQAIJ; 4354 PetscReal real_value; 4355 4356 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4357 Mat A; 4358 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4359 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4360 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4361 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4362 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4363 ierr = MatDestroy(&A);CHKERRQ(ierr); 4364 } else { 4365 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4366 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4367 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4368 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4369 } 4370 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4371 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4372 if (!pcbddc->symmetric_primal) { 4373 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4374 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4375 } 4376 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4377 4378 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4379 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4380 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4381 if (!pcbddc->symmetric_primal) { 4382 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4383 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4384 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4385 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4386 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4387 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4388 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4389 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4390 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4391 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4392 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4393 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4394 } else { 4395 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4396 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4397 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4398 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4399 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4400 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4401 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4402 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4403 } 4404 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4405 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4406 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4407 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4408 if (pcbddc->benign_n) { 4409 Mat B0_B,B0_BPHI; 4410 PetscScalar *data,*data2; 4411 PetscInt j; 4412 4413 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4414 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4415 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4416 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4417 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4418 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4419 for (j=0;j<pcbddc->benign_n;j++) { 4420 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4421 for (i=0;i<pcbddc->local_primal_size;i++) { 4422 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4423 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4424 } 4425 } 4426 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4427 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4428 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4429 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4430 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4431 } 4432 #if 0 4433 { 4434 PetscViewer viewer; 4435 char filename[256]; 4436 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4437 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4438 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4439 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4440 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4441 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4442 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4443 if (pcbddc->coarse_phi_B) { 4444 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4445 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4446 } 4447 if (pcbddc->coarse_phi_D) { 4448 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4449 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4450 } 4451 if (pcbddc->coarse_psi_B) { 4452 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4453 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4454 } 4455 if (pcbddc->coarse_psi_D) { 4456 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4457 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4458 } 4459 ierr = PetscObjectSetName((PetscObject)pcbddc->local_mat,"A");CHKERRQ(ierr); 4460 ierr = MatView(pcbddc->local_mat,viewer);CHKERRQ(ierr); 4461 ierr = PetscObjectSetName((PetscObject)pcbddc->ConstraintMatrix,"C");CHKERRQ(ierr); 4462 ierr = MatView(pcbddc->ConstraintMatrix,viewer);CHKERRQ(ierr); 4463 ierr = PetscObjectSetName((PetscObject)pcis->is_I_local,"I");CHKERRQ(ierr); 4464 ierr = ISView(pcis->is_I_local,viewer);CHKERRQ(ierr); 4465 ierr = PetscObjectSetName((PetscObject)pcis->is_B_local,"B");CHKERRQ(ierr); 4466 ierr = ISView(pcis->is_B_local,viewer);CHKERRQ(ierr); 4467 ierr = PetscObjectSetName((PetscObject)pcbddc->is_R_local,"R");CHKERRQ(ierr); 4468 ierr = ISView(pcbddc->is_R_local,viewer);CHKERRQ(ierr); 4469 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4470 } 4471 #endif 4472 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4473 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4474 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4475 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4476 4477 /* check constraints */ 4478 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4479 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4480 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4481 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4482 } else { 4483 PetscScalar *data; 4484 Mat tmat; 4485 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4486 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4487 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4488 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4489 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4490 } 4491 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4492 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4493 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4494 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4495 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4496 if (!pcbddc->symmetric_primal) { 4497 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4498 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4499 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4500 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4501 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4502 } 4503 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4504 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4505 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4506 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4507 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4508 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4509 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4510 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4511 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4512 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4513 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4514 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4515 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4516 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4517 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4518 if (!pcbddc->symmetric_primal) { 4519 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4520 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4521 } 4522 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4523 } 4524 /* get back data */ 4525 *coarse_submat_vals_n = coarse_submat_vals; 4526 PetscFunctionReturn(0); 4527 } 4528 4529 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4530 { 4531 Mat *work_mat; 4532 IS isrow_s,iscol_s; 4533 PetscBool rsorted,csorted; 4534 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4535 PetscErrorCode ierr; 4536 4537 PetscFunctionBegin; 4538 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4539 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4540 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4541 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4542 4543 if (!rsorted) { 4544 const PetscInt *idxs; 4545 PetscInt *idxs_sorted,i; 4546 4547 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4548 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4549 for (i=0;i<rsize;i++) { 4550 idxs_perm_r[i] = i; 4551 } 4552 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4553 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4554 for (i=0;i<rsize;i++) { 4555 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4556 } 4557 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4558 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4559 } else { 4560 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4561 isrow_s = isrow; 4562 } 4563 4564 if (!csorted) { 4565 if (isrow == iscol) { 4566 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4567 iscol_s = isrow_s; 4568 } else { 4569 const PetscInt *idxs; 4570 PetscInt *idxs_sorted,i; 4571 4572 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4573 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4574 for (i=0;i<csize;i++) { 4575 idxs_perm_c[i] = i; 4576 } 4577 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4578 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4579 for (i=0;i<csize;i++) { 4580 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4581 } 4582 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4583 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4584 } 4585 } else { 4586 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4587 iscol_s = iscol; 4588 } 4589 4590 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4591 4592 if (!rsorted || !csorted) { 4593 Mat new_mat; 4594 IS is_perm_r,is_perm_c; 4595 4596 if (!rsorted) { 4597 PetscInt *idxs_r,i; 4598 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4599 for (i=0;i<rsize;i++) { 4600 idxs_r[idxs_perm_r[i]] = i; 4601 } 4602 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4603 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4604 } else { 4605 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4606 } 4607 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4608 4609 if (!csorted) { 4610 if (isrow_s == iscol_s) { 4611 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4612 is_perm_c = is_perm_r; 4613 } else { 4614 PetscInt *idxs_c,i; 4615 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4616 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4617 for (i=0;i<csize;i++) { 4618 idxs_c[idxs_perm_c[i]] = i; 4619 } 4620 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4621 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4622 } 4623 } else { 4624 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4625 } 4626 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4627 4628 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4629 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4630 work_mat[0] = new_mat; 4631 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4632 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4633 } 4634 4635 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4636 *B = work_mat[0]; 4637 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4638 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4639 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4640 PetscFunctionReturn(0); 4641 } 4642 4643 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4644 { 4645 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4646 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4647 Mat new_mat,lA; 4648 IS is_local,is_global; 4649 PetscInt local_size; 4650 PetscBool isseqaij; 4651 PetscErrorCode ierr; 4652 4653 PetscFunctionBegin; 4654 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4655 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4656 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4657 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4658 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4659 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4660 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4661 4662 /* check */ 4663 if (pcbddc->dbg_flag) { 4664 Vec x,x_change; 4665 PetscReal error; 4666 4667 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4668 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4669 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4670 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4671 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4672 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4673 if (!pcbddc->change_interior) { 4674 const PetscScalar *x,*y,*v; 4675 PetscReal lerror = 0.; 4676 PetscInt i; 4677 4678 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4679 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4680 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4681 for (i=0;i<local_size;i++) 4682 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4683 lerror = PetscAbsScalar(x[i]-y[i]); 4684 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4685 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4686 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4687 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4688 if (error > PETSC_SMALL) { 4689 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4690 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4691 } else { 4692 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4693 } 4694 } 4695 } 4696 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4697 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4698 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4699 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4700 if (error > PETSC_SMALL) { 4701 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4702 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4703 } else { 4704 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4705 } 4706 } 4707 ierr = VecDestroy(&x);CHKERRQ(ierr); 4708 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4709 } 4710 4711 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4712 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4713 4714 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4715 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4716 if (isseqaij) { 4717 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4718 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4719 if (lA) { 4720 Mat work; 4721 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4722 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4723 ierr = MatDestroy(&work);CHKERRQ(ierr); 4724 } 4725 } else { 4726 Mat work_mat; 4727 4728 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4729 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4730 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4731 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4732 if (lA) { 4733 Mat work; 4734 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4735 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4736 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4737 ierr = MatDestroy(&work);CHKERRQ(ierr); 4738 } 4739 } 4740 if (matis->A->symmetric_set) { 4741 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4742 #if !defined(PETSC_USE_COMPLEX) 4743 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4744 #endif 4745 } 4746 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4747 PetscFunctionReturn(0); 4748 } 4749 4750 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4751 { 4752 PC_IS* pcis = (PC_IS*)(pc->data); 4753 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4754 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4755 PetscInt *idx_R_local=NULL; 4756 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4757 PetscInt vbs,bs; 4758 PetscBT bitmask=NULL; 4759 PetscErrorCode ierr; 4760 4761 PetscFunctionBegin; 4762 /* 4763 No need to setup local scatters if 4764 - primal space is unchanged 4765 AND 4766 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4767 AND 4768 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4769 */ 4770 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4771 PetscFunctionReturn(0); 4772 } 4773 /* destroy old objects */ 4774 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4775 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4776 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4777 /* Set Non-overlapping dimensions */ 4778 n_B = pcis->n_B; 4779 n_D = pcis->n - n_B; 4780 n_vertices = pcbddc->n_vertices; 4781 4782 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4783 4784 /* create auxiliary bitmask and allocate workspace */ 4785 if (!sub_schurs || !sub_schurs->reuse_solver) { 4786 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4787 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4788 for (i=0;i<n_vertices;i++) { 4789 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4790 } 4791 4792 for (i=0, n_R=0; i<pcis->n; i++) { 4793 if (!PetscBTLookup(bitmask,i)) { 4794 idx_R_local[n_R++] = i; 4795 } 4796 } 4797 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4798 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4799 4800 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4801 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4802 } 4803 4804 /* Block code */ 4805 vbs = 1; 4806 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4807 if (bs>1 && !(n_vertices%bs)) { 4808 PetscBool is_blocked = PETSC_TRUE; 4809 PetscInt *vary; 4810 if (!sub_schurs || !sub_schurs->reuse_solver) { 4811 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4812 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4813 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4814 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4815 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4816 for (i=0; i<pcis->n/bs; i++) { 4817 if (vary[i]!=0 && vary[i]!=bs) { 4818 is_blocked = PETSC_FALSE; 4819 break; 4820 } 4821 } 4822 ierr = PetscFree(vary);CHKERRQ(ierr); 4823 } else { 4824 /* Verify directly the R set */ 4825 for (i=0; i<n_R/bs; i++) { 4826 PetscInt j,node=idx_R_local[bs*i]; 4827 for (j=1; j<bs; j++) { 4828 if (node != idx_R_local[bs*i+j]-j) { 4829 is_blocked = PETSC_FALSE; 4830 break; 4831 } 4832 } 4833 } 4834 } 4835 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4836 vbs = bs; 4837 for (i=0;i<n_R/vbs;i++) { 4838 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4839 } 4840 } 4841 } 4842 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4843 if (sub_schurs && sub_schurs->reuse_solver) { 4844 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4845 4846 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4847 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4848 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4849 reuse_solver->is_R = pcbddc->is_R_local; 4850 } else { 4851 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4852 } 4853 4854 /* print some info if requested */ 4855 if (pcbddc->dbg_flag) { 4856 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4857 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4858 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4859 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4860 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4861 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4862 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4863 } 4864 4865 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4866 if (!sub_schurs || !sub_schurs->reuse_solver) { 4867 IS is_aux1,is_aux2; 4868 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4869 4870 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4871 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4872 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4873 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4874 for (i=0; i<n_D; i++) { 4875 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4876 } 4877 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4878 for (i=0, j=0; i<n_R; i++) { 4879 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4880 aux_array1[j++] = i; 4881 } 4882 } 4883 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4884 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4885 for (i=0, j=0; i<n_B; i++) { 4886 if (!PetscBTLookup(bitmask,is_indices[i])) { 4887 aux_array2[j++] = i; 4888 } 4889 } 4890 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4891 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4892 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4893 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4894 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4895 4896 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4897 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4898 for (i=0, j=0; i<n_R; i++) { 4899 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4900 aux_array1[j++] = i; 4901 } 4902 } 4903 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4904 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4905 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4906 } 4907 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4908 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4909 } else { 4910 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4911 IS tis; 4912 PetscInt schur_size; 4913 4914 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4915 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4916 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4917 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4918 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4919 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4920 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4921 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4922 } 4923 } 4924 PetscFunctionReturn(0); 4925 } 4926 4927 4928 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4929 { 4930 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4931 PC_IS *pcis = (PC_IS*)pc->data; 4932 PC pc_temp; 4933 Mat A_RR; 4934 MatReuse reuse; 4935 PetscScalar m_one = -1.0; 4936 PetscReal value; 4937 PetscInt n_D,n_R; 4938 PetscBool check_corr,issbaij; 4939 PetscErrorCode ierr; 4940 /* prefixes stuff */ 4941 char dir_prefix[256],neu_prefix[256],str_level[16]; 4942 size_t len; 4943 4944 PetscFunctionBegin; 4945 4946 /* compute prefixes */ 4947 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4948 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4949 if (!pcbddc->current_level) { 4950 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4951 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4952 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4953 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4954 } else { 4955 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 4956 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4957 len -= 15; /* remove "pc_bddc_coarse_" */ 4958 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4959 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4960 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4961 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4962 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4963 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4964 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4965 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4966 } 4967 4968 /* DIRICHLET PROBLEM */ 4969 if (dirichlet) { 4970 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4971 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4972 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4973 if (pcbddc->dbg_flag) { 4974 Mat A_IIn; 4975 4976 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4977 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4978 pcis->A_II = A_IIn; 4979 } 4980 } 4981 if (pcbddc->local_mat->symmetric_set) { 4982 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 4983 } 4984 /* Matrix for Dirichlet problem is pcis->A_II */ 4985 n_D = pcis->n - pcis->n_B; 4986 if (!pcbddc->ksp_D) { /* create object if not yet build */ 4987 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 4988 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 4989 /* default */ 4990 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 4991 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 4992 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 4993 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 4994 if (issbaij) { 4995 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 4996 } else { 4997 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 4998 } 4999 /* Allow user's customization */ 5000 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5001 } 5002 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 5003 if (sub_schurs && sub_schurs->reuse_solver) { 5004 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5005 5006 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5007 } 5008 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5009 if (!n_D) { 5010 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5011 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5012 } 5013 /* Set Up KSP for Dirichlet problem of BDDC */ 5014 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 5015 /* set ksp_D into pcis data */ 5016 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5017 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5018 pcis->ksp_D = pcbddc->ksp_D; 5019 } 5020 5021 /* NEUMANN PROBLEM */ 5022 A_RR = 0; 5023 if (neumann) { 5024 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5025 PetscInt ibs,mbs; 5026 PetscBool issbaij, reuse_neumann_solver; 5027 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5028 5029 reuse_neumann_solver = PETSC_FALSE; 5030 if (sub_schurs && sub_schurs->reuse_solver) { 5031 IS iP; 5032 5033 reuse_neumann_solver = PETSC_TRUE; 5034 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5035 if (iP) reuse_neumann_solver = PETSC_FALSE; 5036 } 5037 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5038 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5039 if (pcbddc->ksp_R) { /* already created ksp */ 5040 PetscInt nn_R; 5041 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5042 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5043 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5044 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5045 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5046 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5047 reuse = MAT_INITIAL_MATRIX; 5048 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5049 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5050 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5051 reuse = MAT_INITIAL_MATRIX; 5052 } else { /* safe to reuse the matrix */ 5053 reuse = MAT_REUSE_MATRIX; 5054 } 5055 } 5056 /* last check */ 5057 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5058 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5059 reuse = MAT_INITIAL_MATRIX; 5060 } 5061 } else { /* first time, so we need to create the matrix */ 5062 reuse = MAT_INITIAL_MATRIX; 5063 } 5064 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 5065 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5066 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5067 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5068 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5069 if (matis->A == pcbddc->local_mat) { 5070 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5071 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5072 } else { 5073 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5074 } 5075 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5076 if (matis->A == pcbddc->local_mat) { 5077 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5078 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5079 } else { 5080 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5081 } 5082 } 5083 /* extract A_RR */ 5084 if (reuse_neumann_solver) { 5085 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5086 5087 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5088 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5089 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5090 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5091 } else { 5092 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5093 } 5094 } else { 5095 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5096 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5097 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5098 } 5099 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5100 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5101 } 5102 if (pcbddc->local_mat->symmetric_set) { 5103 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5104 } 5105 if (!pcbddc->ksp_R) { /* create object if not present */ 5106 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5107 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5108 /* default */ 5109 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5110 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5111 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5112 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5113 if (issbaij) { 5114 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5115 } else { 5116 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5117 } 5118 /* Allow user's customization */ 5119 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5120 } 5121 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5122 if (!n_R) { 5123 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5124 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5125 } 5126 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5127 /* Reuse solver if it is present */ 5128 if (reuse_neumann_solver) { 5129 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5130 5131 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5132 } 5133 /* Set Up KSP for Neumann problem of BDDC */ 5134 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 5135 } 5136 5137 if (pcbddc->dbg_flag) { 5138 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5139 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5140 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5141 } 5142 5143 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5144 check_corr = PETSC_FALSE; 5145 if (pcbddc->NullSpace_corr[0]) { 5146 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5147 } 5148 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5149 check_corr = PETSC_TRUE; 5150 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5151 } 5152 if (neumann && pcbddc->NullSpace_corr[2]) { 5153 check_corr = PETSC_TRUE; 5154 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5155 } 5156 /* check Dirichlet and Neumann solvers */ 5157 if (pcbddc->dbg_flag) { 5158 if (dirichlet) { /* Dirichlet */ 5159 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5160 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5161 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5162 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5163 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5164 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5165 if (check_corr) { 5166 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 5167 } 5168 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5169 } 5170 if (neumann) { /* Neumann */ 5171 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5172 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5173 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5174 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5175 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5176 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5177 if (check_corr) { 5178 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 5179 } 5180 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5181 } 5182 } 5183 /* free Neumann problem's matrix */ 5184 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5185 PetscFunctionReturn(0); 5186 } 5187 5188 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5189 { 5190 PetscErrorCode ierr; 5191 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5192 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5193 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 5194 5195 PetscFunctionBegin; 5196 if (!reuse_solver) { 5197 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5198 } 5199 if (!pcbddc->switch_static) { 5200 if (applytranspose && pcbddc->local_auxmat1) { 5201 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5202 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5203 } 5204 if (!reuse_solver) { 5205 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5206 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5207 } else { 5208 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5209 5210 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5211 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5212 } 5213 } else { 5214 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5215 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5216 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5217 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5218 if (applytranspose && pcbddc->local_auxmat1) { 5219 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5220 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5221 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5222 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5223 } 5224 } 5225 if (!reuse_solver || pcbddc->switch_static) { 5226 if (applytranspose) { 5227 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5228 } else { 5229 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5230 } 5231 } else { 5232 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5233 5234 if (applytranspose) { 5235 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5236 } else { 5237 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5238 } 5239 } 5240 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5241 if (!pcbddc->switch_static) { 5242 if (!reuse_solver) { 5243 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5244 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5245 } else { 5246 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5247 5248 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5249 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5250 } 5251 if (!applytranspose && pcbddc->local_auxmat1) { 5252 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5253 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5254 } 5255 } else { 5256 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5257 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5258 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5259 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5260 if (!applytranspose && pcbddc->local_auxmat1) { 5261 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5262 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5263 } 5264 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5265 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5266 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5267 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5268 } 5269 PetscFunctionReturn(0); 5270 } 5271 5272 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5273 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5274 { 5275 PetscErrorCode ierr; 5276 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5277 PC_IS* pcis = (PC_IS*) (pc->data); 5278 const PetscScalar zero = 0.0; 5279 5280 PetscFunctionBegin; 5281 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5282 if (!pcbddc->benign_apply_coarse_only) { 5283 if (applytranspose) { 5284 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5285 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5286 } else { 5287 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5288 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5289 } 5290 } else { 5291 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5292 } 5293 5294 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5295 if (pcbddc->benign_n) { 5296 PetscScalar *array; 5297 PetscInt j; 5298 5299 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5300 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5301 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5302 } 5303 5304 /* start communications from local primal nodes to rhs of coarse solver */ 5305 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5306 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5307 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5308 5309 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5310 if (pcbddc->coarse_ksp) { 5311 Mat coarse_mat; 5312 Vec rhs,sol; 5313 MatNullSpace nullsp; 5314 PetscBool isbddc = PETSC_FALSE; 5315 5316 if (pcbddc->benign_have_null) { 5317 PC coarse_pc; 5318 5319 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5320 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5321 /* we need to propagate to coarser levels the need for a possible benign correction */ 5322 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5323 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5324 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5325 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5326 } 5327 } 5328 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5329 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5330 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5331 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5332 if (nullsp) { 5333 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5334 } 5335 if (applytranspose) { 5336 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5337 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5338 } else { 5339 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5340 PC coarse_pc; 5341 5342 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5343 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5344 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5345 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5346 } else { 5347 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5348 } 5349 } 5350 /* we don't need the benign correction at coarser levels anymore */ 5351 if (pcbddc->benign_have_null && isbddc) { 5352 PC coarse_pc; 5353 PC_BDDC* coarsepcbddc; 5354 5355 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5356 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5357 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5358 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5359 } 5360 if (nullsp) { 5361 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5362 } 5363 } 5364 5365 /* Local solution on R nodes */ 5366 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5367 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5368 } 5369 /* communications from coarse sol to local primal nodes */ 5370 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5371 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5372 5373 /* Sum contributions from the two levels */ 5374 if (!pcbddc->benign_apply_coarse_only) { 5375 if (applytranspose) { 5376 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5377 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5378 } else { 5379 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5380 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5381 } 5382 /* store p0 */ 5383 if (pcbddc->benign_n) { 5384 PetscScalar *array; 5385 PetscInt j; 5386 5387 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5388 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5389 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5390 } 5391 } else { /* expand the coarse solution */ 5392 if (applytranspose) { 5393 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5394 } else { 5395 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5396 } 5397 } 5398 PetscFunctionReturn(0); 5399 } 5400 5401 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5402 { 5403 PetscErrorCode ierr; 5404 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5405 PetscScalar *array; 5406 Vec from,to; 5407 5408 PetscFunctionBegin; 5409 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5410 from = pcbddc->coarse_vec; 5411 to = pcbddc->vec1_P; 5412 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5413 Vec tvec; 5414 5415 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5416 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5417 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5418 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5419 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5420 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5421 } 5422 } else { /* from local to global -> put data in coarse right hand side */ 5423 from = pcbddc->vec1_P; 5424 to = pcbddc->coarse_vec; 5425 } 5426 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5427 PetscFunctionReturn(0); 5428 } 5429 5430 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5431 { 5432 PetscErrorCode ierr; 5433 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5434 PetscScalar *array; 5435 Vec from,to; 5436 5437 PetscFunctionBegin; 5438 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5439 from = pcbddc->coarse_vec; 5440 to = pcbddc->vec1_P; 5441 } else { /* from local to global -> put data in coarse right hand side */ 5442 from = pcbddc->vec1_P; 5443 to = pcbddc->coarse_vec; 5444 } 5445 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5446 if (smode == SCATTER_FORWARD) { 5447 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5448 Vec tvec; 5449 5450 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5451 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5452 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5453 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5454 } 5455 } else { 5456 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5457 ierr = VecResetArray(from);CHKERRQ(ierr); 5458 } 5459 } 5460 PetscFunctionReturn(0); 5461 } 5462 5463 /* uncomment for testing purposes */ 5464 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5465 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5466 { 5467 PetscErrorCode ierr; 5468 PC_IS* pcis = (PC_IS*)(pc->data); 5469 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5470 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5471 /* one and zero */ 5472 PetscScalar one=1.0,zero=0.0; 5473 /* space to store constraints and their local indices */ 5474 PetscScalar *constraints_data; 5475 PetscInt *constraints_idxs,*constraints_idxs_B; 5476 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5477 PetscInt *constraints_n; 5478 /* iterators */ 5479 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5480 /* BLAS integers */ 5481 PetscBLASInt lwork,lierr; 5482 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5483 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5484 /* reuse */ 5485 PetscInt olocal_primal_size,olocal_primal_size_cc; 5486 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5487 /* change of basis */ 5488 PetscBool qr_needed; 5489 PetscBT change_basis,qr_needed_idx; 5490 /* auxiliary stuff */ 5491 PetscInt *nnz,*is_indices; 5492 PetscInt ncc; 5493 /* some quantities */ 5494 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5495 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5496 PetscReal tol; /* tolerance for retaining eigenmodes */ 5497 5498 PetscFunctionBegin; 5499 tol = PetscSqrtReal(PETSC_SMALL); 5500 /* Destroy Mat objects computed previously */ 5501 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5502 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5503 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5504 /* save info on constraints from previous setup (if any) */ 5505 olocal_primal_size = pcbddc->local_primal_size; 5506 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5507 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5508 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5509 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5510 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5511 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5512 5513 if (!pcbddc->adaptive_selection) { 5514 IS ISForVertices,*ISForFaces,*ISForEdges; 5515 MatNullSpace nearnullsp; 5516 const Vec *nearnullvecs; 5517 Vec *localnearnullsp; 5518 PetscScalar *array; 5519 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5520 PetscBool nnsp_has_cnst; 5521 /* LAPACK working arrays for SVD or POD */ 5522 PetscBool skip_lapack,boolforchange; 5523 PetscScalar *work; 5524 PetscReal *singular_vals; 5525 #if defined(PETSC_USE_COMPLEX) 5526 PetscReal *rwork; 5527 #endif 5528 #if defined(PETSC_MISSING_LAPACK_GESVD) 5529 PetscScalar *temp_basis,*correlation_mat; 5530 #else 5531 PetscBLASInt dummy_int=1; 5532 PetscScalar dummy_scalar=1.; 5533 #endif 5534 5535 /* Get index sets for faces, edges and vertices from graph */ 5536 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5537 /* print some info */ 5538 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5539 PetscInt nv; 5540 5541 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5542 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5543 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5544 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5545 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5546 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5547 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5548 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5549 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5550 } 5551 5552 /* free unneeded index sets */ 5553 if (!pcbddc->use_vertices) { 5554 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5555 } 5556 if (!pcbddc->use_edges) { 5557 for (i=0;i<n_ISForEdges;i++) { 5558 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5559 } 5560 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5561 n_ISForEdges = 0; 5562 } 5563 if (!pcbddc->use_faces) { 5564 for (i=0;i<n_ISForFaces;i++) { 5565 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5566 } 5567 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5568 n_ISForFaces = 0; 5569 } 5570 5571 /* check if near null space is attached to global mat */ 5572 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5573 if (nearnullsp) { 5574 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5575 /* remove any stored info */ 5576 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5577 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5578 /* store information for BDDC solver reuse */ 5579 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5580 pcbddc->onearnullspace = nearnullsp; 5581 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5582 for (i=0;i<nnsp_size;i++) { 5583 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5584 } 5585 } else { /* if near null space is not provided BDDC uses constants by default */ 5586 nnsp_size = 0; 5587 nnsp_has_cnst = PETSC_TRUE; 5588 } 5589 /* get max number of constraints on a single cc */ 5590 max_constraints = nnsp_size; 5591 if (nnsp_has_cnst) max_constraints++; 5592 5593 /* 5594 Evaluate maximum storage size needed by the procedure 5595 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5596 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5597 There can be multiple constraints per connected component 5598 */ 5599 n_vertices = 0; 5600 if (ISForVertices) { 5601 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5602 } 5603 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5604 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5605 5606 total_counts = n_ISForFaces+n_ISForEdges; 5607 total_counts *= max_constraints; 5608 total_counts += n_vertices; 5609 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5610 5611 total_counts = 0; 5612 max_size_of_constraint = 0; 5613 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5614 IS used_is; 5615 if (i<n_ISForEdges) { 5616 used_is = ISForEdges[i]; 5617 } else { 5618 used_is = ISForFaces[i-n_ISForEdges]; 5619 } 5620 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5621 total_counts += j; 5622 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5623 } 5624 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5625 5626 /* get local part of global near null space vectors */ 5627 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5628 for (k=0;k<nnsp_size;k++) { 5629 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5630 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5631 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5632 } 5633 5634 /* whether or not to skip lapack calls */ 5635 skip_lapack = PETSC_TRUE; 5636 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5637 5638 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5639 if (!skip_lapack) { 5640 PetscScalar temp_work; 5641 5642 #if defined(PETSC_MISSING_LAPACK_GESVD) 5643 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5644 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5645 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5646 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5647 #if defined(PETSC_USE_COMPLEX) 5648 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5649 #endif 5650 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5651 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5652 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5653 lwork = -1; 5654 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5655 #if !defined(PETSC_USE_COMPLEX) 5656 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5657 #else 5658 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5659 #endif 5660 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5661 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5662 #else /* on missing GESVD */ 5663 /* SVD */ 5664 PetscInt max_n,min_n; 5665 max_n = max_size_of_constraint; 5666 min_n = max_constraints; 5667 if (max_size_of_constraint < max_constraints) { 5668 min_n = max_size_of_constraint; 5669 max_n = max_constraints; 5670 } 5671 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5672 #if defined(PETSC_USE_COMPLEX) 5673 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5674 #endif 5675 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5676 lwork = -1; 5677 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5678 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5679 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5680 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5681 #if !defined(PETSC_USE_COMPLEX) 5682 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5683 #else 5684 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5685 #endif 5686 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5687 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5688 #endif /* on missing GESVD */ 5689 /* Allocate optimal workspace */ 5690 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5691 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5692 } 5693 /* Now we can loop on constraining sets */ 5694 total_counts = 0; 5695 constraints_idxs_ptr[0] = 0; 5696 constraints_data_ptr[0] = 0; 5697 /* vertices */ 5698 if (n_vertices) { 5699 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5700 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5701 for (i=0;i<n_vertices;i++) { 5702 constraints_n[total_counts] = 1; 5703 constraints_data[total_counts] = 1.0; 5704 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5705 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5706 total_counts++; 5707 } 5708 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5709 n_vertices = total_counts; 5710 } 5711 5712 /* edges and faces */ 5713 total_counts_cc = total_counts; 5714 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5715 IS used_is; 5716 PetscBool idxs_copied = PETSC_FALSE; 5717 5718 if (ncc<n_ISForEdges) { 5719 used_is = ISForEdges[ncc]; 5720 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5721 } else { 5722 used_is = ISForFaces[ncc-n_ISForEdges]; 5723 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5724 } 5725 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5726 5727 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5728 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5729 /* change of basis should not be performed on local periodic nodes */ 5730 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5731 if (nnsp_has_cnst) { 5732 PetscScalar quad_value; 5733 5734 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5735 idxs_copied = PETSC_TRUE; 5736 5737 if (!pcbddc->use_nnsp_true) { 5738 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5739 } else { 5740 quad_value = 1.0; 5741 } 5742 for (j=0;j<size_of_constraint;j++) { 5743 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5744 } 5745 temp_constraints++; 5746 total_counts++; 5747 } 5748 for (k=0;k<nnsp_size;k++) { 5749 PetscReal real_value; 5750 PetscScalar *ptr_to_data; 5751 5752 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5753 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5754 for (j=0;j<size_of_constraint;j++) { 5755 ptr_to_data[j] = array[is_indices[j]]; 5756 } 5757 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5758 /* check if array is null on the connected component */ 5759 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5760 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5761 if (real_value > tol*size_of_constraint) { /* keep indices and values */ 5762 temp_constraints++; 5763 total_counts++; 5764 if (!idxs_copied) { 5765 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5766 idxs_copied = PETSC_TRUE; 5767 } 5768 } 5769 } 5770 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5771 valid_constraints = temp_constraints; 5772 if (!pcbddc->use_nnsp_true && temp_constraints) { 5773 if (temp_constraints == 1) { /* just normalize the constraint */ 5774 PetscScalar norm,*ptr_to_data; 5775 5776 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5777 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5778 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5779 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5780 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5781 } else { /* perform SVD */ 5782 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5783 5784 #if defined(PETSC_MISSING_LAPACK_GESVD) 5785 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5786 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5787 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5788 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5789 from that computed using LAPACKgesvd 5790 -> This is due to a different computation of eigenvectors in LAPACKheev 5791 -> The quality of the POD-computed basis will be the same */ 5792 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5793 /* Store upper triangular part of correlation matrix */ 5794 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5795 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5796 for (j=0;j<temp_constraints;j++) { 5797 for (k=0;k<j+1;k++) { 5798 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5799 } 5800 } 5801 /* compute eigenvalues and eigenvectors of correlation matrix */ 5802 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5803 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5804 #if !defined(PETSC_USE_COMPLEX) 5805 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5806 #else 5807 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5808 #endif 5809 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5810 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5811 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5812 j = 0; 5813 while (j < temp_constraints && singular_vals[j] < tol) j++; 5814 total_counts = total_counts-j; 5815 valid_constraints = temp_constraints-j; 5816 /* scale and copy POD basis into used quadrature memory */ 5817 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5818 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5819 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5820 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5821 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5822 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5823 if (j<temp_constraints) { 5824 PetscInt ii; 5825 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5826 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5827 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5828 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5829 for (k=0;k<temp_constraints-j;k++) { 5830 for (ii=0;ii<size_of_constraint;ii++) { 5831 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5832 } 5833 } 5834 } 5835 #else /* on missing GESVD */ 5836 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5837 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5838 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5839 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5840 #if !defined(PETSC_USE_COMPLEX) 5841 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5842 #else 5843 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5844 #endif 5845 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5846 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5847 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5848 k = temp_constraints; 5849 if (k > size_of_constraint) k = size_of_constraint; 5850 j = 0; 5851 while (j < k && singular_vals[k-j-1] < tol) j++; 5852 valid_constraints = k-j; 5853 total_counts = total_counts-temp_constraints+valid_constraints; 5854 #endif /* on missing GESVD */ 5855 } 5856 } 5857 /* update pointers information */ 5858 if (valid_constraints) { 5859 constraints_n[total_counts_cc] = valid_constraints; 5860 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5861 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5862 /* set change_of_basis flag */ 5863 if (boolforchange) { 5864 PetscBTSet(change_basis,total_counts_cc); 5865 } 5866 total_counts_cc++; 5867 } 5868 } 5869 /* free workspace */ 5870 if (!skip_lapack) { 5871 ierr = PetscFree(work);CHKERRQ(ierr); 5872 #if defined(PETSC_USE_COMPLEX) 5873 ierr = PetscFree(rwork);CHKERRQ(ierr); 5874 #endif 5875 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5876 #if defined(PETSC_MISSING_LAPACK_GESVD) 5877 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5878 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5879 #endif 5880 } 5881 for (k=0;k<nnsp_size;k++) { 5882 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5883 } 5884 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5885 /* free index sets of faces, edges and vertices */ 5886 for (i=0;i<n_ISForFaces;i++) { 5887 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5888 } 5889 if (n_ISForFaces) { 5890 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5891 } 5892 for (i=0;i<n_ISForEdges;i++) { 5893 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5894 } 5895 if (n_ISForEdges) { 5896 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5897 } 5898 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5899 } else { 5900 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5901 5902 total_counts = 0; 5903 n_vertices = 0; 5904 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5905 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5906 } 5907 max_constraints = 0; 5908 total_counts_cc = 0; 5909 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5910 total_counts += pcbddc->adaptive_constraints_n[i]; 5911 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5912 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5913 } 5914 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5915 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5916 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5917 constraints_data = pcbddc->adaptive_constraints_data; 5918 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5919 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5920 total_counts_cc = 0; 5921 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5922 if (pcbddc->adaptive_constraints_n[i]) { 5923 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5924 } 5925 } 5926 #if 0 5927 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5928 for (i=0;i<total_counts_cc;i++) { 5929 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5930 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5931 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5932 printf(" %d",constraints_idxs[j]); 5933 } 5934 printf("\n"); 5935 printf("number of cc: %d\n",constraints_n[i]); 5936 } 5937 for (i=0;i<n_vertices;i++) { 5938 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5939 } 5940 for (i=0;i<sub_schurs->n_subs;i++) { 5941 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5942 } 5943 #endif 5944 5945 max_size_of_constraint = 0; 5946 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5947 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5948 /* Change of basis */ 5949 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5950 if (pcbddc->use_change_of_basis) { 5951 for (i=0;i<sub_schurs->n_subs;i++) { 5952 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5953 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5954 } 5955 } 5956 } 5957 } 5958 pcbddc->local_primal_size = total_counts; 5959 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5960 5961 /* map constraints_idxs in boundary numbering */ 5962 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5963 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5964 5965 /* Create constraint matrix */ 5966 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5967 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 5968 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 5969 5970 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 5971 /* determine if a QR strategy is needed for change of basis */ 5972 qr_needed = PETSC_FALSE; 5973 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 5974 total_primal_vertices=0; 5975 pcbddc->local_primal_size_cc = 0; 5976 for (i=0;i<total_counts_cc;i++) { 5977 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 5978 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 5979 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 5980 pcbddc->local_primal_size_cc += 1; 5981 } else if (PetscBTLookup(change_basis,i)) { 5982 for (k=0;k<constraints_n[i];k++) { 5983 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 5984 } 5985 pcbddc->local_primal_size_cc += constraints_n[i]; 5986 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 5987 PetscBTSet(qr_needed_idx,i); 5988 qr_needed = PETSC_TRUE; 5989 } 5990 } else { 5991 pcbddc->local_primal_size_cc += 1; 5992 } 5993 } 5994 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 5995 pcbddc->n_vertices = total_primal_vertices; 5996 /* permute indices in order to have a sorted set of vertices */ 5997 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5998 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5999 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 6000 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6001 6002 /* nonzero structure of constraint matrix */ 6003 /* and get reference dof for local constraints */ 6004 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6005 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6006 6007 j = total_primal_vertices; 6008 total_counts = total_primal_vertices; 6009 cum = total_primal_vertices; 6010 for (i=n_vertices;i<total_counts_cc;i++) { 6011 if (!PetscBTLookup(change_basis,i)) { 6012 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6013 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6014 cum++; 6015 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6016 for (k=0;k<constraints_n[i];k++) { 6017 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6018 nnz[j+k] = size_of_constraint; 6019 } 6020 j += constraints_n[i]; 6021 } 6022 } 6023 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6024 ierr = PetscFree(nnz);CHKERRQ(ierr); 6025 6026 /* set values in constraint matrix */ 6027 for (i=0;i<total_primal_vertices;i++) { 6028 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6029 } 6030 total_counts = total_primal_vertices; 6031 for (i=n_vertices;i<total_counts_cc;i++) { 6032 if (!PetscBTLookup(change_basis,i)) { 6033 PetscInt *cols; 6034 6035 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6036 cols = constraints_idxs+constraints_idxs_ptr[i]; 6037 for (k=0;k<constraints_n[i];k++) { 6038 PetscInt row = total_counts+k; 6039 PetscScalar *vals; 6040 6041 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6042 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6043 } 6044 total_counts += constraints_n[i]; 6045 } 6046 } 6047 /* assembling */ 6048 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6049 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6050 6051 /* 6052 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6053 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 6054 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 6055 */ 6056 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6057 if (pcbddc->use_change_of_basis) { 6058 /* dual and primal dofs on a single cc */ 6059 PetscInt dual_dofs,primal_dofs; 6060 /* working stuff for GEQRF */ 6061 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 6062 PetscBLASInt lqr_work; 6063 /* working stuff for UNGQR */ 6064 PetscScalar *gqr_work,lgqr_work_t; 6065 PetscBLASInt lgqr_work; 6066 /* working stuff for TRTRS */ 6067 PetscScalar *trs_rhs; 6068 PetscBLASInt Blas_NRHS; 6069 /* pointers for values insertion into change of basis matrix */ 6070 PetscInt *start_rows,*start_cols; 6071 PetscScalar *start_vals; 6072 /* working stuff for values insertion */ 6073 PetscBT is_primal; 6074 PetscInt *aux_primal_numbering_B; 6075 /* matrix sizes */ 6076 PetscInt global_size,local_size; 6077 /* temporary change of basis */ 6078 Mat localChangeOfBasisMatrix; 6079 /* extra space for debugging */ 6080 PetscScalar *dbg_work; 6081 6082 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6083 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6084 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6085 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6086 /* nonzeros for local mat */ 6087 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6088 if (!pcbddc->benign_change || pcbddc->fake_change) { 6089 for (i=0;i<pcis->n;i++) nnz[i]=1; 6090 } else { 6091 const PetscInt *ii; 6092 PetscInt n; 6093 PetscBool flg_row; 6094 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6095 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6096 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6097 } 6098 for (i=n_vertices;i<total_counts_cc;i++) { 6099 if (PetscBTLookup(change_basis,i)) { 6100 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6101 if (PetscBTLookup(qr_needed_idx,i)) { 6102 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6103 } else { 6104 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6105 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6106 } 6107 } 6108 } 6109 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6110 ierr = PetscFree(nnz);CHKERRQ(ierr); 6111 /* Set interior change in the matrix */ 6112 if (!pcbddc->benign_change || pcbddc->fake_change) { 6113 for (i=0;i<pcis->n;i++) { 6114 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6115 } 6116 } else { 6117 const PetscInt *ii,*jj; 6118 PetscScalar *aa; 6119 PetscInt n; 6120 PetscBool flg_row; 6121 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6122 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6123 for (i=0;i<n;i++) { 6124 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6125 } 6126 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6127 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6128 } 6129 6130 if (pcbddc->dbg_flag) { 6131 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6132 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6133 } 6134 6135 6136 /* Now we loop on the constraints which need a change of basis */ 6137 /* 6138 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6139 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6140 6141 Basic blocks of change of basis matrix T computed by 6142 6143 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6144 6145 | 1 0 ... 0 s_1/S | 6146 | 0 1 ... 0 s_2/S | 6147 | ... | 6148 | 0 ... 1 s_{n-1}/S | 6149 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6150 6151 with S = \sum_{i=1}^n s_i^2 6152 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6153 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6154 6155 - QR decomposition of constraints otherwise 6156 */ 6157 if (qr_needed) { 6158 /* space to store Q */ 6159 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6160 /* array to store scaling factors for reflectors */ 6161 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6162 /* first we issue queries for optimal work */ 6163 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6164 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6165 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6166 lqr_work = -1; 6167 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6168 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6169 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6170 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6171 lgqr_work = -1; 6172 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6173 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6174 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6175 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6176 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6177 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6178 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6179 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6180 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6181 /* array to store rhs and solution of triangular solver */ 6182 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6183 /* allocating workspace for check */ 6184 if (pcbddc->dbg_flag) { 6185 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6186 } 6187 } 6188 /* array to store whether a node is primal or not */ 6189 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6190 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6191 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6192 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 6193 for (i=0;i<total_primal_vertices;i++) { 6194 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6195 } 6196 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6197 6198 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6199 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6200 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6201 if (PetscBTLookup(change_basis,total_counts)) { 6202 /* get constraint info */ 6203 primal_dofs = constraints_n[total_counts]; 6204 dual_dofs = size_of_constraint-primal_dofs; 6205 6206 if (pcbddc->dbg_flag) { 6207 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6208 } 6209 6210 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6211 6212 /* copy quadrature constraints for change of basis check */ 6213 if (pcbddc->dbg_flag) { 6214 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6215 } 6216 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6217 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6218 6219 /* compute QR decomposition of constraints */ 6220 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6221 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6222 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6223 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6224 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6225 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6226 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6227 6228 /* explictly compute R^-T */ 6229 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 6230 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6231 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6232 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6233 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6234 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6235 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6236 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6237 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6238 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6239 6240 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6241 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6242 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6243 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6244 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6245 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6246 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6247 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6248 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6249 6250 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6251 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6252 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6253 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6254 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6255 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6256 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6257 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6258 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6259 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6260 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6261 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6262 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6263 6264 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6265 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6266 /* insert cols for primal dofs */ 6267 for (j=0;j<primal_dofs;j++) { 6268 start_vals = &qr_basis[j*size_of_constraint]; 6269 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6270 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6271 } 6272 /* insert cols for dual dofs */ 6273 for (j=0,k=0;j<dual_dofs;k++) { 6274 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6275 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6276 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6277 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6278 j++; 6279 } 6280 } 6281 6282 /* check change of basis */ 6283 if (pcbddc->dbg_flag) { 6284 PetscInt ii,jj; 6285 PetscBool valid_qr=PETSC_TRUE; 6286 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6287 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6288 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6289 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6290 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6291 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6292 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6293 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6294 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6295 for (jj=0;jj<size_of_constraint;jj++) { 6296 for (ii=0;ii<primal_dofs;ii++) { 6297 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6298 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6299 } 6300 } 6301 if (!valid_qr) { 6302 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6303 for (jj=0;jj<size_of_constraint;jj++) { 6304 for (ii=0;ii<primal_dofs;ii++) { 6305 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6306 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6307 } 6308 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6309 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6310 } 6311 } 6312 } 6313 } else { 6314 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6315 } 6316 } 6317 } else { /* simple transformation block */ 6318 PetscInt row,col; 6319 PetscScalar val,norm; 6320 6321 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6322 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6323 for (j=0;j<size_of_constraint;j++) { 6324 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6325 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6326 if (!PetscBTLookup(is_primal,row_B)) { 6327 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6328 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6329 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6330 } else { 6331 for (k=0;k<size_of_constraint;k++) { 6332 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6333 if (row != col) { 6334 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6335 } else { 6336 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6337 } 6338 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6339 } 6340 } 6341 } 6342 if (pcbddc->dbg_flag) { 6343 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6344 } 6345 } 6346 } else { 6347 if (pcbddc->dbg_flag) { 6348 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6349 } 6350 } 6351 } 6352 6353 /* free workspace */ 6354 if (qr_needed) { 6355 if (pcbddc->dbg_flag) { 6356 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6357 } 6358 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6359 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6360 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6361 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6362 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6363 } 6364 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6365 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6366 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6367 6368 /* assembling of global change of variable */ 6369 if (!pcbddc->fake_change) { 6370 Mat tmat; 6371 PetscInt bs; 6372 6373 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6374 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6375 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6376 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6377 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6378 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6379 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6380 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6381 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6382 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6383 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6384 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6385 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6386 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6387 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6388 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6389 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6390 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6391 6392 /* check */ 6393 if (pcbddc->dbg_flag) { 6394 PetscReal error; 6395 Vec x,x_change; 6396 6397 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6398 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6399 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6400 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6401 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6402 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6403 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6404 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6405 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6406 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6407 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6408 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6409 if (error > PETSC_SMALL) { 6410 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6411 } 6412 ierr = VecDestroy(&x);CHKERRQ(ierr); 6413 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6414 } 6415 /* adapt sub_schurs computed (if any) */ 6416 if (pcbddc->use_deluxe_scaling) { 6417 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6418 6419 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6420 if (sub_schurs && sub_schurs->S_Ej_all) { 6421 Mat S_new,tmat; 6422 IS is_all_N,is_V_Sall = NULL; 6423 6424 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6425 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6426 if (pcbddc->deluxe_zerorows) { 6427 ISLocalToGlobalMapping NtoSall; 6428 IS is_V; 6429 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6430 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6431 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6432 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6433 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6434 } 6435 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6436 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6437 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6438 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6439 if (pcbddc->deluxe_zerorows) { 6440 const PetscScalar *array; 6441 const PetscInt *idxs_V,*idxs_all; 6442 PetscInt i,n_V; 6443 6444 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6445 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6446 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6447 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6448 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6449 for (i=0;i<n_V;i++) { 6450 PetscScalar val; 6451 PetscInt idx; 6452 6453 idx = idxs_V[i]; 6454 val = array[idxs_all[idxs_V[i]]]; 6455 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6456 } 6457 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6458 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6459 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6460 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6461 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6462 } 6463 sub_schurs->S_Ej_all = S_new; 6464 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6465 if (sub_schurs->sum_S_Ej_all) { 6466 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6467 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6468 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6469 if (pcbddc->deluxe_zerorows) { 6470 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6471 } 6472 sub_schurs->sum_S_Ej_all = S_new; 6473 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6474 } 6475 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6476 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6477 } 6478 /* destroy any change of basis context in sub_schurs */ 6479 if (sub_schurs && sub_schurs->change) { 6480 PetscInt i; 6481 6482 for (i=0;i<sub_schurs->n_subs;i++) { 6483 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6484 } 6485 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6486 } 6487 } 6488 if (pcbddc->switch_static) { /* need to save the local change */ 6489 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6490 } else { 6491 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6492 } 6493 /* determine if any process has changed the pressures locally */ 6494 pcbddc->change_interior = pcbddc->benign_have_null; 6495 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6496 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6497 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6498 pcbddc->use_qr_single = qr_needed; 6499 } 6500 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6501 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6502 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6503 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6504 } else { 6505 Mat benign_global = NULL; 6506 if (pcbddc->benign_have_null) { 6507 Mat tmat; 6508 6509 pcbddc->change_interior = PETSC_TRUE; 6510 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6511 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6512 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6513 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6514 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6515 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6516 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6517 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6518 if (pcbddc->benign_change) { 6519 Mat M; 6520 6521 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6522 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6523 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6524 ierr = MatDestroy(&M);CHKERRQ(ierr); 6525 } else { 6526 Mat eye; 6527 PetscScalar *array; 6528 6529 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6530 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6531 for (i=0;i<pcis->n;i++) { 6532 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6533 } 6534 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6535 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6536 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6537 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6538 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6539 } 6540 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6541 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6542 } 6543 if (pcbddc->user_ChangeOfBasisMatrix) { 6544 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6545 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6546 } else if (pcbddc->benign_have_null) { 6547 pcbddc->ChangeOfBasisMatrix = benign_global; 6548 } 6549 } 6550 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6551 IS is_global; 6552 const PetscInt *gidxs; 6553 6554 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6555 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6556 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6557 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6558 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6559 } 6560 } 6561 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6562 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6563 } 6564 6565 if (!pcbddc->fake_change) { 6566 /* add pressure dofs to set of primal nodes for numbering purposes */ 6567 for (i=0;i<pcbddc->benign_n;i++) { 6568 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6569 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6570 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6571 pcbddc->local_primal_size_cc++; 6572 pcbddc->local_primal_size++; 6573 } 6574 6575 /* check if a new primal space has been introduced (also take into account benign trick) */ 6576 pcbddc->new_primal_space_local = PETSC_TRUE; 6577 if (olocal_primal_size == pcbddc->local_primal_size) { 6578 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6579 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6580 if (!pcbddc->new_primal_space_local) { 6581 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6582 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6583 } 6584 } 6585 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6586 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6587 } 6588 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6589 6590 /* flush dbg viewer */ 6591 if (pcbddc->dbg_flag) { 6592 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6593 } 6594 6595 /* free workspace */ 6596 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6597 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6598 if (!pcbddc->adaptive_selection) { 6599 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6600 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6601 } else { 6602 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6603 pcbddc->adaptive_constraints_idxs_ptr, 6604 pcbddc->adaptive_constraints_data_ptr, 6605 pcbddc->adaptive_constraints_idxs, 6606 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6607 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6608 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6609 } 6610 PetscFunctionReturn(0); 6611 } 6612 /* #undef PETSC_MISSING_LAPACK_GESVD */ 6613 6614 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6615 { 6616 ISLocalToGlobalMapping map; 6617 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6618 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6619 PetscInt i,N; 6620 PetscBool rcsr = PETSC_FALSE; 6621 PetscErrorCode ierr; 6622 6623 PetscFunctionBegin; 6624 if (pcbddc->recompute_topography) { 6625 pcbddc->graphanalyzed = PETSC_FALSE; 6626 /* Reset previously computed graph */ 6627 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6628 /* Init local Graph struct */ 6629 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6630 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6631 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6632 6633 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6634 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6635 } 6636 /* Check validity of the csr graph passed in by the user */ 6637 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6638 6639 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6640 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6641 PetscInt *xadj,*adjncy; 6642 PetscInt nvtxs; 6643 PetscBool flg_row=PETSC_FALSE; 6644 6645 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6646 if (flg_row) { 6647 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6648 pcbddc->computed_rowadj = PETSC_TRUE; 6649 } 6650 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6651 rcsr = PETSC_TRUE; 6652 } 6653 if (pcbddc->dbg_flag) { 6654 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6655 } 6656 6657 /* Setup of Graph */ 6658 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6659 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6660 6661 /* attach info on disconnected subdomains if present */ 6662 if (pcbddc->n_local_subs) { 6663 PetscInt *local_subs; 6664 6665 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6666 for (i=0;i<pcbddc->n_local_subs;i++) { 6667 const PetscInt *idxs; 6668 PetscInt nl,j; 6669 6670 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6671 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6672 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6673 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6674 } 6675 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6676 pcbddc->mat_graph->local_subs = local_subs; 6677 } 6678 } 6679 6680 if (!pcbddc->graphanalyzed) { 6681 /* Graph's connected components analysis */ 6682 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6683 pcbddc->graphanalyzed = PETSC_TRUE; 6684 } 6685 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6686 PetscFunctionReturn(0); 6687 } 6688 6689 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6690 { 6691 PetscInt i,j; 6692 PetscScalar *alphas; 6693 PetscErrorCode ierr; 6694 6695 PetscFunctionBegin; 6696 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6697 for (i=0;i<n;i++) { 6698 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6699 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6700 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6701 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6702 } 6703 ierr = PetscFree(alphas);CHKERRQ(ierr); 6704 PetscFunctionReturn(0); 6705 } 6706 6707 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6708 { 6709 Mat A; 6710 PetscInt n_neighs,*neighs,*n_shared,**shared; 6711 PetscMPIInt size,rank,color; 6712 PetscInt *xadj,*adjncy; 6713 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6714 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6715 PetscInt void_procs,*procs_candidates = NULL; 6716 PetscInt xadj_count,*count; 6717 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6718 PetscSubcomm psubcomm; 6719 MPI_Comm subcomm; 6720 PetscErrorCode ierr; 6721 6722 PetscFunctionBegin; 6723 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6724 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6725 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6726 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6727 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6728 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6729 6730 if (have_void) *have_void = PETSC_FALSE; 6731 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6732 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6733 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6734 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6735 im_active = !!n; 6736 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6737 void_procs = size - active_procs; 6738 /* get ranks of of non-active processes in mat communicator */ 6739 if (void_procs) { 6740 PetscInt ncand; 6741 6742 if (have_void) *have_void = PETSC_TRUE; 6743 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6744 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6745 for (i=0,ncand=0;i<size;i++) { 6746 if (!procs_candidates[i]) { 6747 procs_candidates[ncand++] = i; 6748 } 6749 } 6750 /* force n_subdomains to be not greater that the number of non-active processes */ 6751 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6752 } 6753 6754 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6755 number of subdomains requested 1 -> send to master or first candidate in voids */ 6756 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6757 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6758 PetscInt issize,isidx,dest; 6759 if (*n_subdomains == 1) dest = 0; 6760 else dest = rank; 6761 if (im_active) { 6762 issize = 1; 6763 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6764 isidx = procs_candidates[dest]; 6765 } else { 6766 isidx = dest; 6767 } 6768 } else { 6769 issize = 0; 6770 isidx = -1; 6771 } 6772 if (*n_subdomains != 1) *n_subdomains = active_procs; 6773 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6774 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6775 PetscFunctionReturn(0); 6776 } 6777 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6778 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6779 threshold = PetscMax(threshold,2); 6780 6781 /* Get info on mapping */ 6782 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6783 6784 /* build local CSR graph of subdomains' connectivity */ 6785 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6786 xadj[0] = 0; 6787 xadj[1] = PetscMax(n_neighs-1,0); 6788 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6789 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6790 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6791 for (i=1;i<n_neighs;i++) 6792 for (j=0;j<n_shared[i];j++) 6793 count[shared[i][j]] += 1; 6794 6795 xadj_count = 0; 6796 for (i=1;i<n_neighs;i++) { 6797 for (j=0;j<n_shared[i];j++) { 6798 if (count[shared[i][j]] < threshold) { 6799 adjncy[xadj_count] = neighs[i]; 6800 adjncy_wgt[xadj_count] = n_shared[i]; 6801 xadj_count++; 6802 break; 6803 } 6804 } 6805 } 6806 xadj[1] = xadj_count; 6807 ierr = PetscFree(count);CHKERRQ(ierr); 6808 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6809 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6810 6811 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6812 6813 /* Restrict work on active processes only */ 6814 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6815 if (void_procs) { 6816 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6817 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6818 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6819 subcomm = PetscSubcommChild(psubcomm); 6820 } else { 6821 psubcomm = NULL; 6822 subcomm = PetscObjectComm((PetscObject)mat); 6823 } 6824 6825 v_wgt = NULL; 6826 if (!color) { 6827 ierr = PetscFree(xadj);CHKERRQ(ierr); 6828 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6829 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6830 } else { 6831 Mat subdomain_adj; 6832 IS new_ranks,new_ranks_contig; 6833 MatPartitioning partitioner; 6834 PetscInt rstart=0,rend=0; 6835 PetscInt *is_indices,*oldranks; 6836 PetscMPIInt size; 6837 PetscBool aggregate; 6838 6839 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6840 if (void_procs) { 6841 PetscInt prank = rank; 6842 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6843 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6844 for (i=0;i<xadj[1];i++) { 6845 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6846 } 6847 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6848 } else { 6849 oldranks = NULL; 6850 } 6851 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6852 if (aggregate) { /* TODO: all this part could be made more efficient */ 6853 PetscInt lrows,row,ncols,*cols; 6854 PetscMPIInt nrank; 6855 PetscScalar *vals; 6856 6857 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6858 lrows = 0; 6859 if (nrank<redprocs) { 6860 lrows = size/redprocs; 6861 if (nrank<size%redprocs) lrows++; 6862 } 6863 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6864 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6865 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6866 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6867 row = nrank; 6868 ncols = xadj[1]-xadj[0]; 6869 cols = adjncy; 6870 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6871 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6872 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6873 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6874 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6875 ierr = PetscFree(xadj);CHKERRQ(ierr); 6876 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6877 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6878 ierr = PetscFree(vals);CHKERRQ(ierr); 6879 if (use_vwgt) { 6880 Vec v; 6881 const PetscScalar *array; 6882 PetscInt nl; 6883 6884 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6885 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 6886 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6887 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6888 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6889 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6890 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6891 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6892 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6893 ierr = VecDestroy(&v);CHKERRQ(ierr); 6894 } 6895 } else { 6896 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6897 if (use_vwgt) { 6898 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6899 v_wgt[0] = n; 6900 } 6901 } 6902 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6903 6904 /* Partition */ 6905 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6906 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6907 if (v_wgt) { 6908 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6909 } 6910 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6911 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6912 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6913 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6914 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6915 6916 /* renumber new_ranks to avoid "holes" in new set of processors */ 6917 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6918 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6919 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6920 if (!aggregate) { 6921 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6922 #if defined(PETSC_USE_DEBUG) 6923 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6924 #endif 6925 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6926 } else if (oldranks) { 6927 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6928 } else { 6929 ranks_send_to_idx[0] = is_indices[0]; 6930 } 6931 } else { 6932 PetscInt idx = 0; 6933 PetscMPIInt tag; 6934 MPI_Request *reqs; 6935 6936 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6937 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6938 for (i=rstart;i<rend;i++) { 6939 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6940 } 6941 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6942 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6943 ierr = PetscFree(reqs);CHKERRQ(ierr); 6944 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6945 #if defined(PETSC_USE_DEBUG) 6946 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6947 #endif 6948 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 6949 } else if (oldranks) { 6950 ranks_send_to_idx[0] = oldranks[idx]; 6951 } else { 6952 ranks_send_to_idx[0] = idx; 6953 } 6954 } 6955 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6956 /* clean up */ 6957 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6958 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6959 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6960 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6961 } 6962 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6963 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6964 6965 /* assemble parallel IS for sends */ 6966 i = 1; 6967 if (!color) i=0; 6968 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 6969 PetscFunctionReturn(0); 6970 } 6971 6972 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 6973 6974 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 6975 { 6976 Mat local_mat; 6977 IS is_sends_internal; 6978 PetscInt rows,cols,new_local_rows; 6979 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 6980 PetscBool ismatis,isdense,newisdense,destroy_mat; 6981 ISLocalToGlobalMapping l2gmap; 6982 PetscInt* l2gmap_indices; 6983 const PetscInt* is_indices; 6984 MatType new_local_type; 6985 /* buffers */ 6986 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 6987 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 6988 PetscInt *recv_buffer_idxs_local; 6989 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 6990 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 6991 /* MPI */ 6992 MPI_Comm comm,comm_n; 6993 PetscSubcomm subcomm; 6994 PetscMPIInt n_sends,n_recvs,commsize; 6995 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 6996 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 6997 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 6998 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 6999 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7000 PetscErrorCode ierr; 7001 7002 PetscFunctionBegin; 7003 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7004 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7005 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7006 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7007 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7008 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7009 PetscValidLogicalCollectiveBool(mat,reuse,6); 7010 PetscValidLogicalCollectiveInt(mat,nis,8); 7011 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7012 if (nvecs) { 7013 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7014 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7015 } 7016 /* further checks */ 7017 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7018 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7019 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7020 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7021 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7022 if (reuse && *mat_n) { 7023 PetscInt mrows,mcols,mnrows,mncols; 7024 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7025 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7026 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7027 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7028 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7029 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7030 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7031 } 7032 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7033 PetscValidLogicalCollectiveInt(mat,bs,0); 7034 7035 /* prepare IS for sending if not provided */ 7036 if (!is_sends) { 7037 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7038 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7039 } else { 7040 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7041 is_sends_internal = is_sends; 7042 } 7043 7044 /* get comm */ 7045 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7046 7047 /* compute number of sends */ 7048 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7049 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7050 7051 /* compute number of receives */ 7052 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 7053 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 7054 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 7055 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7056 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7057 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7058 ierr = PetscFree(iflags);CHKERRQ(ierr); 7059 7060 /* restrict comm if requested */ 7061 subcomm = 0; 7062 destroy_mat = PETSC_FALSE; 7063 if (restrict_comm) { 7064 PetscMPIInt color,subcommsize; 7065 7066 color = 0; 7067 if (restrict_full) { 7068 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7069 } else { 7070 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7071 } 7072 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7073 subcommsize = commsize - subcommsize; 7074 /* check if reuse has been requested */ 7075 if (reuse) { 7076 if (*mat_n) { 7077 PetscMPIInt subcommsize2; 7078 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 7079 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7080 comm_n = PetscObjectComm((PetscObject)*mat_n); 7081 } else { 7082 comm_n = PETSC_COMM_SELF; 7083 } 7084 } else { /* MAT_INITIAL_MATRIX */ 7085 PetscMPIInt rank; 7086 7087 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 7088 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7089 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7090 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7091 comm_n = PetscSubcommChild(subcomm); 7092 } 7093 /* flag to destroy *mat_n if not significative */ 7094 if (color) destroy_mat = PETSC_TRUE; 7095 } else { 7096 comm_n = comm; 7097 } 7098 7099 /* prepare send/receive buffers */ 7100 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 7101 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 7102 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 7103 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 7104 if (nis) { 7105 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 7106 } 7107 7108 /* Get data from local matrices */ 7109 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7110 /* TODO: See below some guidelines on how to prepare the local buffers */ 7111 /* 7112 send_buffer_vals should contain the raw values of the local matrix 7113 send_buffer_idxs should contain: 7114 - MatType_PRIVATE type 7115 - PetscInt size_of_l2gmap 7116 - PetscInt global_row_indices[size_of_l2gmap] 7117 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7118 */ 7119 else { 7120 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7121 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7122 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7123 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7124 send_buffer_idxs[1] = i; 7125 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7126 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 7127 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7128 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7129 for (i=0;i<n_sends;i++) { 7130 ilengths_vals[is_indices[i]] = len*len; 7131 ilengths_idxs[is_indices[i]] = len+2; 7132 } 7133 } 7134 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7135 /* additional is (if any) */ 7136 if (nis) { 7137 PetscMPIInt psum; 7138 PetscInt j; 7139 for (j=0,psum=0;j<nis;j++) { 7140 PetscInt plen; 7141 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7142 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7143 psum += len+1; /* indices + lenght */ 7144 } 7145 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7146 for (j=0,psum=0;j<nis;j++) { 7147 PetscInt plen; 7148 const PetscInt *is_array_idxs; 7149 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7150 send_buffer_idxs_is[psum] = plen; 7151 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7152 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 7153 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7154 psum += plen+1; /* indices + lenght */ 7155 } 7156 for (i=0;i<n_sends;i++) { 7157 ilengths_idxs_is[is_indices[i]] = psum; 7158 } 7159 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7160 } 7161 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7162 7163 buf_size_idxs = 0; 7164 buf_size_vals = 0; 7165 buf_size_idxs_is = 0; 7166 buf_size_vecs = 0; 7167 for (i=0;i<n_recvs;i++) { 7168 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7169 buf_size_vals += (PetscInt)olengths_vals[i]; 7170 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7171 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7172 } 7173 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7174 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7175 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7176 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7177 7178 /* get new tags for clean communications */ 7179 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7180 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7181 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7182 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7183 7184 /* allocate for requests */ 7185 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7186 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7187 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7188 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7189 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7190 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7191 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7192 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7193 7194 /* communications */ 7195 ptr_idxs = recv_buffer_idxs; 7196 ptr_vals = recv_buffer_vals; 7197 ptr_idxs_is = recv_buffer_idxs_is; 7198 ptr_vecs = recv_buffer_vecs; 7199 for (i=0;i<n_recvs;i++) { 7200 source_dest = onodes[i]; 7201 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7202 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7203 ptr_idxs += olengths_idxs[i]; 7204 ptr_vals += olengths_vals[i]; 7205 if (nis) { 7206 source_dest = onodes_is[i]; 7207 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7208 ptr_idxs_is += olengths_idxs_is[i]; 7209 } 7210 if (nvecs) { 7211 source_dest = onodes[i]; 7212 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7213 ptr_vecs += olengths_idxs[i]-2; 7214 } 7215 } 7216 for (i=0;i<n_sends;i++) { 7217 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7218 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7219 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7220 if (nis) { 7221 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7222 } 7223 if (nvecs) { 7224 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7225 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7226 } 7227 } 7228 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7229 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7230 7231 /* assemble new l2g map */ 7232 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7233 ptr_idxs = recv_buffer_idxs; 7234 new_local_rows = 0; 7235 for (i=0;i<n_recvs;i++) { 7236 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7237 ptr_idxs += olengths_idxs[i]; 7238 } 7239 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7240 ptr_idxs = recv_buffer_idxs; 7241 new_local_rows = 0; 7242 for (i=0;i<n_recvs;i++) { 7243 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 7244 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7245 ptr_idxs += olengths_idxs[i]; 7246 } 7247 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7248 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7249 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7250 7251 /* infer new local matrix type from received local matrices type */ 7252 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7253 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7254 if (n_recvs) { 7255 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7256 ptr_idxs = recv_buffer_idxs; 7257 for (i=0;i<n_recvs;i++) { 7258 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7259 new_local_type_private = MATAIJ_PRIVATE; 7260 break; 7261 } 7262 ptr_idxs += olengths_idxs[i]; 7263 } 7264 switch (new_local_type_private) { 7265 case MATDENSE_PRIVATE: 7266 new_local_type = MATSEQAIJ; 7267 bs = 1; 7268 break; 7269 case MATAIJ_PRIVATE: 7270 new_local_type = MATSEQAIJ; 7271 bs = 1; 7272 break; 7273 case MATBAIJ_PRIVATE: 7274 new_local_type = MATSEQBAIJ; 7275 break; 7276 case MATSBAIJ_PRIVATE: 7277 new_local_type = MATSEQSBAIJ; 7278 break; 7279 default: 7280 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7281 break; 7282 } 7283 } else { /* by default, new_local_type is seqaij */ 7284 new_local_type = MATSEQAIJ; 7285 bs = 1; 7286 } 7287 7288 /* create MATIS object if needed */ 7289 if (!reuse) { 7290 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7291 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7292 } else { 7293 /* it also destroys the local matrices */ 7294 if (*mat_n) { 7295 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7296 } else { /* this is a fake object */ 7297 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7298 } 7299 } 7300 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7301 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7302 7303 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7304 7305 /* Global to local map of received indices */ 7306 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7307 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7308 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7309 7310 /* restore attributes -> type of incoming data and its size */ 7311 buf_size_idxs = 0; 7312 for (i=0;i<n_recvs;i++) { 7313 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7314 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7315 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7316 } 7317 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7318 7319 /* set preallocation */ 7320 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7321 if (!newisdense) { 7322 PetscInt *new_local_nnz=0; 7323 7324 ptr_idxs = recv_buffer_idxs_local; 7325 if (n_recvs) { 7326 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7327 } 7328 for (i=0;i<n_recvs;i++) { 7329 PetscInt j; 7330 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7331 for (j=0;j<*(ptr_idxs+1);j++) { 7332 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7333 } 7334 } else { 7335 /* TODO */ 7336 } 7337 ptr_idxs += olengths_idxs[i]; 7338 } 7339 if (new_local_nnz) { 7340 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7341 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7342 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7343 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7344 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7345 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7346 } else { 7347 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7348 } 7349 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7350 } else { 7351 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7352 } 7353 7354 /* set values */ 7355 ptr_vals = recv_buffer_vals; 7356 ptr_idxs = recv_buffer_idxs_local; 7357 for (i=0;i<n_recvs;i++) { 7358 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7359 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7360 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7361 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7362 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7363 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7364 } else { 7365 /* TODO */ 7366 } 7367 ptr_idxs += olengths_idxs[i]; 7368 ptr_vals += olengths_vals[i]; 7369 } 7370 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7371 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7372 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7373 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7374 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7375 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7376 7377 #if 0 7378 if (!restrict_comm) { /* check */ 7379 Vec lvec,rvec; 7380 PetscReal infty_error; 7381 7382 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7383 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7384 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7385 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7386 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7387 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7388 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7389 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7390 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7391 } 7392 #endif 7393 7394 /* assemble new additional is (if any) */ 7395 if (nis) { 7396 PetscInt **temp_idxs,*count_is,j,psum; 7397 7398 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7399 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7400 ptr_idxs = recv_buffer_idxs_is; 7401 psum = 0; 7402 for (i=0;i<n_recvs;i++) { 7403 for (j=0;j<nis;j++) { 7404 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7405 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7406 psum += plen; 7407 ptr_idxs += plen+1; /* shift pointer to received data */ 7408 } 7409 } 7410 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7411 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7412 for (i=1;i<nis;i++) { 7413 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7414 } 7415 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7416 ptr_idxs = recv_buffer_idxs_is; 7417 for (i=0;i<n_recvs;i++) { 7418 for (j=0;j<nis;j++) { 7419 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7420 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7421 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7422 ptr_idxs += plen+1; /* shift pointer to received data */ 7423 } 7424 } 7425 for (i=0;i<nis;i++) { 7426 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7427 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7428 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7429 } 7430 ierr = PetscFree(count_is);CHKERRQ(ierr); 7431 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7432 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7433 } 7434 /* free workspace */ 7435 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7436 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7437 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7438 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7439 if (isdense) { 7440 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7441 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7442 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7443 } else { 7444 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7445 } 7446 if (nis) { 7447 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7448 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7449 } 7450 7451 if (nvecs) { 7452 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7453 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7454 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7455 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7456 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7457 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7458 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7459 /* set values */ 7460 ptr_vals = recv_buffer_vecs; 7461 ptr_idxs = recv_buffer_idxs_local; 7462 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7463 for (i=0;i<n_recvs;i++) { 7464 PetscInt j; 7465 for (j=0;j<*(ptr_idxs+1);j++) { 7466 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7467 } 7468 ptr_idxs += olengths_idxs[i]; 7469 ptr_vals += olengths_idxs[i]-2; 7470 } 7471 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7472 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7473 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7474 } 7475 7476 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7477 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7478 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7479 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7480 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7481 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7482 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7483 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7484 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7485 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7486 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7487 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7488 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7489 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7490 ierr = PetscFree(onodes);CHKERRQ(ierr); 7491 if (nis) { 7492 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7493 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7494 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7495 } 7496 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7497 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7498 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7499 for (i=0;i<nis;i++) { 7500 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7501 } 7502 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7503 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7504 } 7505 *mat_n = NULL; 7506 } 7507 PetscFunctionReturn(0); 7508 } 7509 7510 /* temporary hack into ksp private data structure */ 7511 #include <petsc/private/kspimpl.h> 7512 7513 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7514 { 7515 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7516 PC_IS *pcis = (PC_IS*)pc->data; 7517 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7518 Mat coarsedivudotp = NULL; 7519 Mat coarseG,t_coarse_mat_is; 7520 MatNullSpace CoarseNullSpace = NULL; 7521 ISLocalToGlobalMapping coarse_islg; 7522 IS coarse_is,*isarray; 7523 PetscInt i,im_active=-1,active_procs=-1; 7524 PetscInt nis,nisdofs,nisneu,nisvert; 7525 PC pc_temp; 7526 PCType coarse_pc_type; 7527 KSPType coarse_ksp_type; 7528 PetscBool multilevel_requested,multilevel_allowed; 7529 PetscBool coarse_reuse; 7530 PetscInt ncoarse,nedcfield; 7531 PetscBool compute_vecs = PETSC_FALSE; 7532 PetscScalar *array; 7533 MatReuse coarse_mat_reuse; 7534 PetscBool restr, full_restr, have_void; 7535 PetscMPIInt commsize; 7536 PetscErrorCode ierr; 7537 7538 PetscFunctionBegin; 7539 /* Assign global numbering to coarse dofs */ 7540 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7541 PetscInt ocoarse_size; 7542 compute_vecs = PETSC_TRUE; 7543 7544 pcbddc->new_primal_space = PETSC_TRUE; 7545 ocoarse_size = pcbddc->coarse_size; 7546 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7547 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7548 /* see if we can avoid some work */ 7549 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7550 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7551 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7552 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7553 coarse_reuse = PETSC_FALSE; 7554 } else { /* we can safely reuse already computed coarse matrix */ 7555 coarse_reuse = PETSC_TRUE; 7556 } 7557 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7558 coarse_reuse = PETSC_FALSE; 7559 } 7560 /* reset any subassembling information */ 7561 if (!coarse_reuse || pcbddc->recompute_topography) { 7562 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7563 } 7564 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7565 coarse_reuse = PETSC_TRUE; 7566 } 7567 /* assemble coarse matrix */ 7568 if (coarse_reuse && pcbddc->coarse_ksp) { 7569 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7570 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7571 coarse_mat_reuse = MAT_REUSE_MATRIX; 7572 } else { 7573 coarse_mat = NULL; 7574 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7575 } 7576 7577 /* creates temporary l2gmap and IS for coarse indexes */ 7578 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7579 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7580 7581 /* creates temporary MATIS object for coarse matrix */ 7582 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7583 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7584 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7585 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7586 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7587 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7588 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7589 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7590 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7591 7592 /* count "active" (i.e. with positive local size) and "void" processes */ 7593 im_active = !!(pcis->n); 7594 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7595 7596 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7597 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7598 /* full_restr : just use the receivers from the subassembling pattern */ 7599 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);CHKERRQ(ierr); 7600 coarse_mat_is = NULL; 7601 multilevel_allowed = PETSC_FALSE; 7602 multilevel_requested = PETSC_FALSE; 7603 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7604 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7605 if (multilevel_requested) { 7606 ncoarse = active_procs/pcbddc->coarsening_ratio; 7607 restr = PETSC_FALSE; 7608 full_restr = PETSC_FALSE; 7609 } else { 7610 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7611 restr = PETSC_TRUE; 7612 full_restr = PETSC_TRUE; 7613 } 7614 if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7615 ncoarse = PetscMax(1,ncoarse); 7616 if (!pcbddc->coarse_subassembling) { 7617 if (pcbddc->coarsening_ratio > 1) { 7618 if (multilevel_requested) { 7619 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7620 } else { 7621 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7622 } 7623 } else { 7624 PetscMPIInt rank; 7625 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7626 have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE; 7627 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7628 } 7629 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7630 PetscInt psum; 7631 if (pcbddc->coarse_ksp) psum = 1; 7632 else psum = 0; 7633 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7634 if (ncoarse < commsize) have_void = PETSC_TRUE; 7635 } 7636 /* determine if we can go multilevel */ 7637 if (multilevel_requested) { 7638 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7639 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7640 } 7641 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7642 7643 /* dump subassembling pattern */ 7644 if (pcbddc->dbg_flag && multilevel_allowed) { 7645 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7646 } 7647 7648 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7649 nedcfield = -1; 7650 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7651 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7652 const PetscInt *idxs; 7653 ISLocalToGlobalMapping tmap; 7654 7655 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7656 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7657 /* allocate space for temporary storage */ 7658 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7659 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7660 /* allocate for IS array */ 7661 nisdofs = pcbddc->n_ISForDofsLocal; 7662 if (pcbddc->nedclocal) { 7663 if (pcbddc->nedfield > -1) { 7664 nedcfield = pcbddc->nedfield; 7665 } else { 7666 nedcfield = 0; 7667 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7668 nisdofs = 1; 7669 } 7670 } 7671 nisneu = !!pcbddc->NeumannBoundariesLocal; 7672 nisvert = 0; /* nisvert is not used */ 7673 nis = nisdofs + nisneu + nisvert; 7674 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7675 /* dofs splitting */ 7676 for (i=0;i<nisdofs;i++) { 7677 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7678 if (nedcfield != i) { 7679 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7680 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7681 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7682 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7683 } else { 7684 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7685 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7686 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7687 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7688 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7689 } 7690 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7691 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7692 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7693 } 7694 /* neumann boundaries */ 7695 if (pcbddc->NeumannBoundariesLocal) { 7696 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7697 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7698 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7699 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7700 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7701 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7702 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7703 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7704 } 7705 /* free memory */ 7706 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7707 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7708 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7709 } else { 7710 nis = 0; 7711 nisdofs = 0; 7712 nisneu = 0; 7713 nisvert = 0; 7714 isarray = NULL; 7715 } 7716 /* destroy no longer needed map */ 7717 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7718 7719 /* subassemble */ 7720 if (multilevel_allowed) { 7721 Vec vp[1]; 7722 PetscInt nvecs = 0; 7723 PetscBool reuse,reuser; 7724 7725 if (coarse_mat) reuse = PETSC_TRUE; 7726 else reuse = PETSC_FALSE; 7727 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7728 vp[0] = NULL; 7729 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7730 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7731 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7732 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7733 nvecs = 1; 7734 7735 if (pcbddc->divudotp) { 7736 Mat B,loc_divudotp; 7737 Vec v,p; 7738 IS dummy; 7739 PetscInt np; 7740 7741 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7742 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7743 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7744 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7745 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7746 ierr = VecSet(p,1.);CHKERRQ(ierr); 7747 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7748 ierr = VecDestroy(&p);CHKERRQ(ierr); 7749 ierr = MatDestroy(&B);CHKERRQ(ierr); 7750 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7751 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7752 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7753 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7754 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7755 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7756 ierr = VecDestroy(&v);CHKERRQ(ierr); 7757 } 7758 } 7759 if (reuser) { 7760 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7761 } else { 7762 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7763 } 7764 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7765 PetscScalar *arraym,*arrayv; 7766 PetscInt nl; 7767 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7768 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7769 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7770 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7771 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7772 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7773 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7774 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7775 } else { 7776 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7777 } 7778 } else { 7779 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7780 } 7781 if (coarse_mat_is || coarse_mat) { 7782 PetscMPIInt size; 7783 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7784 if (!multilevel_allowed) { 7785 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7786 } else { 7787 Mat A; 7788 7789 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7790 if (coarse_mat_is) { 7791 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7792 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7793 coarse_mat = coarse_mat_is; 7794 } 7795 /* be sure we don't have MatSeqDENSE as local mat */ 7796 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7797 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7798 } 7799 } 7800 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7801 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7802 7803 /* create local to global scatters for coarse problem */ 7804 if (compute_vecs) { 7805 PetscInt lrows; 7806 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7807 if (coarse_mat) { 7808 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7809 } else { 7810 lrows = 0; 7811 } 7812 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7813 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7814 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7815 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7816 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7817 } 7818 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7819 7820 /* set defaults for coarse KSP and PC */ 7821 if (multilevel_allowed) { 7822 coarse_ksp_type = KSPRICHARDSON; 7823 coarse_pc_type = PCBDDC; 7824 } else { 7825 coarse_ksp_type = KSPPREONLY; 7826 coarse_pc_type = PCREDUNDANT; 7827 } 7828 7829 /* print some info if requested */ 7830 if (pcbddc->dbg_flag) { 7831 if (!multilevel_allowed) { 7832 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7833 if (multilevel_requested) { 7834 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7835 } else if (pcbddc->max_levels) { 7836 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7837 } 7838 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7839 } 7840 } 7841 7842 /* communicate coarse discrete gradient */ 7843 coarseG = NULL; 7844 if (pcbddc->nedcG && multilevel_allowed) { 7845 MPI_Comm ccomm; 7846 if (coarse_mat) { 7847 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7848 } else { 7849 ccomm = MPI_COMM_NULL; 7850 } 7851 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7852 } 7853 7854 /* create the coarse KSP object only once with defaults */ 7855 if (coarse_mat) { 7856 PetscBool isredundant,isnn,isbddc; 7857 PetscViewer dbg_viewer = NULL; 7858 7859 if (pcbddc->dbg_flag) { 7860 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7861 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7862 } 7863 if (!pcbddc->coarse_ksp) { 7864 char prefix[256],str_level[16]; 7865 size_t len; 7866 7867 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7868 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7869 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7870 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7871 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7872 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7873 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7874 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7875 /* TODO is this logic correct? should check for coarse_mat type */ 7876 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7877 /* prefix */ 7878 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7879 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7880 if (!pcbddc->current_level) { 7881 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7882 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7883 } else { 7884 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7885 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7886 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7887 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7888 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 7889 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7890 } 7891 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7892 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7893 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7894 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7895 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7896 /* allow user customization */ 7897 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7898 } 7899 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7900 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7901 if (nisdofs) { 7902 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7903 for (i=0;i<nisdofs;i++) { 7904 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7905 } 7906 } 7907 if (nisneu) { 7908 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7909 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7910 } 7911 if (nisvert) { 7912 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7913 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7914 } 7915 if (coarseG) { 7916 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7917 } 7918 7919 /* get some info after set from options */ 7920 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7921 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 7922 if (isbddc && !multilevel_allowed) { 7923 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7924 isbddc = PETSC_FALSE; 7925 } 7926 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 7927 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7928 if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) { 7929 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 7930 isbddc = PETSC_TRUE; 7931 } 7932 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7933 if (isredundant) { 7934 KSP inner_ksp; 7935 PC inner_pc; 7936 7937 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7938 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7939 } 7940 7941 /* parameters which miss an API */ 7942 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7943 if (isbddc) { 7944 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7945 7946 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7947 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7948 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7949 if (pcbddc_coarse->benign_saddle_point) { 7950 Mat coarsedivudotp_is; 7951 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7952 IS row,col; 7953 const PetscInt *gidxs; 7954 PetscInt n,st,M,N; 7955 7956 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7957 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7958 st = st-n; 7959 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7960 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7961 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7962 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7963 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7964 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7965 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 7966 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 7967 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 7968 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 7969 ierr = ISDestroy(&row);CHKERRQ(ierr); 7970 ierr = ISDestroy(&col);CHKERRQ(ierr); 7971 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 7972 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 7973 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 7974 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 7975 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 7976 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 7977 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 7978 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 7979 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 7980 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 7981 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 7982 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 7983 } 7984 } 7985 7986 /* propagate symmetry info of coarse matrix */ 7987 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 7988 if (pc->pmat->symmetric_set) { 7989 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 7990 } 7991 if (pc->pmat->hermitian_set) { 7992 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 7993 } 7994 if (pc->pmat->spd_set) { 7995 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 7996 } 7997 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 7998 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 7999 } 8000 /* set operators */ 8001 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8002 if (pcbddc->dbg_flag) { 8003 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8004 } 8005 } 8006 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8007 ierr = PetscFree(isarray);CHKERRQ(ierr); 8008 #if 0 8009 { 8010 PetscViewer viewer; 8011 char filename[256]; 8012 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8013 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8014 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8015 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8016 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8017 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8018 } 8019 #endif 8020 8021 if (pcbddc->coarse_ksp) { 8022 Vec crhs,csol; 8023 8024 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8025 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8026 if (!csol) { 8027 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8028 } 8029 if (!crhs) { 8030 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8031 } 8032 } 8033 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8034 8035 /* compute null space for coarse solver if the benign trick has been requested */ 8036 if (pcbddc->benign_null) { 8037 8038 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8039 for (i=0;i<pcbddc->benign_n;i++) { 8040 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8041 } 8042 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8043 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8044 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8045 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8046 if (coarse_mat) { 8047 Vec nullv; 8048 PetscScalar *array,*array2; 8049 PetscInt nl; 8050 8051 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8052 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8053 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8054 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8055 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 8056 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8057 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8058 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8059 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8060 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8061 } 8062 } 8063 8064 if (pcbddc->coarse_ksp) { 8065 PetscBool ispreonly; 8066 8067 if (CoarseNullSpace) { 8068 PetscBool isnull; 8069 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8070 if (isnull) { 8071 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8072 } 8073 /* TODO: add local nullspaces (if any) */ 8074 } 8075 /* setup coarse ksp */ 8076 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8077 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8078 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8079 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 8080 KSP check_ksp; 8081 KSPType check_ksp_type; 8082 PC check_pc; 8083 Vec check_vec,coarse_vec; 8084 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8085 PetscInt its; 8086 PetscBool compute_eigs; 8087 PetscReal *eigs_r,*eigs_c; 8088 PetscInt neigs; 8089 const char *prefix; 8090 8091 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8092 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8093 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8094 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8095 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8096 /* prevent from setup unneeded object */ 8097 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8098 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8099 if (ispreonly) { 8100 check_ksp_type = KSPPREONLY; 8101 compute_eigs = PETSC_FALSE; 8102 } else { 8103 check_ksp_type = KSPGMRES; 8104 compute_eigs = PETSC_TRUE; 8105 } 8106 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8107 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8108 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8109 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8110 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8111 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8112 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8113 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8114 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8115 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8116 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8117 /* create random vec */ 8118 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8119 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8120 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8121 /* solve coarse problem */ 8122 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8123 /* set eigenvalue estimation if preonly has not been requested */ 8124 if (compute_eigs) { 8125 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8126 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8127 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8128 if (neigs) { 8129 lambda_max = eigs_r[neigs-1]; 8130 lambda_min = eigs_r[0]; 8131 if (pcbddc->use_coarse_estimates) { 8132 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8133 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8134 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8135 } 8136 } 8137 } 8138 } 8139 8140 /* check coarse problem residual error */ 8141 if (pcbddc->dbg_flag) { 8142 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8143 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8144 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8145 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8146 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8147 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8148 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8149 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8150 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8151 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8152 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8153 if (CoarseNullSpace) { 8154 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8155 } 8156 if (compute_eigs) { 8157 PetscReal lambda_max_s,lambda_min_s; 8158 KSPConvergedReason reason; 8159 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8160 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8161 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8162 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8163 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8164 for (i=0;i<neigs;i++) { 8165 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8166 } 8167 } 8168 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8169 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8170 } 8171 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8172 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8173 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8174 if (compute_eigs) { 8175 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8176 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8177 } 8178 } 8179 } 8180 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8181 /* print additional info */ 8182 if (pcbddc->dbg_flag) { 8183 /* waits until all processes reaches this point */ 8184 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8185 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 8186 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8187 } 8188 8189 /* free memory */ 8190 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8191 PetscFunctionReturn(0); 8192 } 8193 8194 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8195 { 8196 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8197 PC_IS* pcis = (PC_IS*)pc->data; 8198 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8199 IS subset,subset_mult,subset_n; 8200 PetscInt local_size,coarse_size=0; 8201 PetscInt *local_primal_indices=NULL; 8202 const PetscInt *t_local_primal_indices; 8203 PetscErrorCode ierr; 8204 8205 PetscFunctionBegin; 8206 /* Compute global number of coarse dofs */ 8207 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8208 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8209 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8210 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8211 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8212 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8213 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8214 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8215 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8216 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8217 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8218 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8219 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 8220 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8221 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8222 8223 /* check numbering */ 8224 if (pcbddc->dbg_flag) { 8225 PetscScalar coarsesum,*array,*array2; 8226 PetscInt i; 8227 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8228 8229 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8230 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8231 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8232 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8233 /* counter */ 8234 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8235 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8236 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8237 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8238 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8239 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8240 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8241 for (i=0;i<pcbddc->local_primal_size;i++) { 8242 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8243 } 8244 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8245 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8246 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8247 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8248 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8249 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8250 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8251 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8252 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8253 for (i=0;i<pcis->n;i++) { 8254 if (array[i] != 0.0 && array[i] != array2[i]) { 8255 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8256 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8257 set_error = PETSC_TRUE; 8258 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8259 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8260 } 8261 } 8262 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8263 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8264 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8265 for (i=0;i<pcis->n;i++) { 8266 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8267 } 8268 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8269 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8270 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8271 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8272 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8273 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8274 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8275 PetscInt *gidxs; 8276 8277 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8278 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8279 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8280 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8281 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8282 for (i=0;i<pcbddc->local_primal_size;i++) { 8283 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8284 } 8285 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8286 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8287 } 8288 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8289 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8290 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8291 } 8292 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 8293 /* get back data */ 8294 *coarse_size_n = coarse_size; 8295 *local_primal_indices_n = local_primal_indices; 8296 PetscFunctionReturn(0); 8297 } 8298 8299 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8300 { 8301 IS localis_t; 8302 PetscInt i,lsize,*idxs,n; 8303 PetscScalar *vals; 8304 PetscErrorCode ierr; 8305 8306 PetscFunctionBegin; 8307 /* get indices in local ordering exploiting local to global map */ 8308 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8309 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8310 for (i=0;i<lsize;i++) vals[i] = 1.0; 8311 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8312 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8313 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8314 if (idxs) { /* multilevel guard */ 8315 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 8316 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8317 } 8318 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8319 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8320 ierr = PetscFree(vals);CHKERRQ(ierr); 8321 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8322 /* now compute set in local ordering */ 8323 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8324 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8325 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8326 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8327 for (i=0,lsize=0;i<n;i++) { 8328 if (PetscRealPart(vals[i]) > 0.5) { 8329 lsize++; 8330 } 8331 } 8332 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8333 for (i=0,lsize=0;i<n;i++) { 8334 if (PetscRealPart(vals[i]) > 0.5) { 8335 idxs[lsize++] = i; 8336 } 8337 } 8338 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8339 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8340 *localis = localis_t; 8341 PetscFunctionReturn(0); 8342 } 8343 8344 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8345 { 8346 PC_IS *pcis=(PC_IS*)pc->data; 8347 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8348 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8349 Mat S_j; 8350 PetscInt *used_xadj,*used_adjncy; 8351 PetscBool free_used_adj; 8352 PetscErrorCode ierr; 8353 8354 PetscFunctionBegin; 8355 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8356 free_used_adj = PETSC_FALSE; 8357 if (pcbddc->sub_schurs_layers == -1) { 8358 used_xadj = NULL; 8359 used_adjncy = NULL; 8360 } else { 8361 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8362 used_xadj = pcbddc->mat_graph->xadj; 8363 used_adjncy = pcbddc->mat_graph->adjncy; 8364 } else if (pcbddc->computed_rowadj) { 8365 used_xadj = pcbddc->mat_graph->xadj; 8366 used_adjncy = pcbddc->mat_graph->adjncy; 8367 } else { 8368 PetscBool flg_row=PETSC_FALSE; 8369 const PetscInt *xadj,*adjncy; 8370 PetscInt nvtxs; 8371 8372 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8373 if (flg_row) { 8374 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8375 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8376 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8377 free_used_adj = PETSC_TRUE; 8378 } else { 8379 pcbddc->sub_schurs_layers = -1; 8380 used_xadj = NULL; 8381 used_adjncy = NULL; 8382 } 8383 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8384 } 8385 } 8386 8387 /* setup sub_schurs data */ 8388 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8389 if (!sub_schurs->schur_explicit) { 8390 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8391 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8392 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8393 } else { 8394 Mat change = NULL; 8395 Vec scaling = NULL; 8396 IS change_primal = NULL, iP; 8397 PetscInt benign_n; 8398 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8399 PetscBool isseqaij,need_change = PETSC_FALSE; 8400 PetscBool discrete_harmonic = PETSC_FALSE; 8401 8402 if (!pcbddc->use_vertices && reuse_solvers) { 8403 PetscInt n_vertices; 8404 8405 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8406 reuse_solvers = (PetscBool)!n_vertices; 8407 } 8408 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8409 if (!isseqaij) { 8410 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8411 if (matis->A == pcbddc->local_mat) { 8412 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8413 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8414 } else { 8415 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8416 } 8417 } 8418 if (!pcbddc->benign_change_explicit) { 8419 benign_n = pcbddc->benign_n; 8420 } else { 8421 benign_n = 0; 8422 } 8423 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8424 We need a global reduction to avoid possible deadlocks. 8425 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8426 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8427 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8428 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8429 need_change = (PetscBool)(!need_change); 8430 } 8431 /* If the user defines additional constraints, we import them here. 8432 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8433 if (need_change) { 8434 PC_IS *pcisf; 8435 PC_BDDC *pcbddcf; 8436 PC pcf; 8437 8438 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8439 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8440 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8441 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8442 8443 /* hacks */ 8444 pcisf = (PC_IS*)pcf->data; 8445 pcisf->is_B_local = pcis->is_B_local; 8446 pcisf->vec1_N = pcis->vec1_N; 8447 pcisf->BtoNmap = pcis->BtoNmap; 8448 pcisf->n = pcis->n; 8449 pcisf->n_B = pcis->n_B; 8450 pcbddcf = (PC_BDDC*)pcf->data; 8451 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8452 pcbddcf->mat_graph = pcbddc->mat_graph; 8453 pcbddcf->use_faces = PETSC_TRUE; 8454 pcbddcf->use_change_of_basis = PETSC_TRUE; 8455 pcbddcf->use_change_on_faces = PETSC_TRUE; 8456 pcbddcf->use_qr_single = PETSC_TRUE; 8457 pcbddcf->fake_change = PETSC_TRUE; 8458 8459 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8460 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8461 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8462 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8463 change = pcbddcf->ConstraintMatrix; 8464 pcbddcf->ConstraintMatrix = NULL; 8465 8466 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8467 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8468 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8469 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8470 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8471 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8472 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8473 pcf->ops->destroy = NULL; 8474 pcf->ops->reset = NULL; 8475 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8476 } 8477 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8478 8479 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 8480 if (iP) { 8481 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 8482 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 8483 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8484 } 8485 if (discrete_harmonic) { 8486 Mat A; 8487 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 8488 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 8489 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 8490 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8491 ierr = MatDestroy(&A);CHKERRQ(ierr); 8492 } else { 8493 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8494 } 8495 ierr = MatDestroy(&change);CHKERRQ(ierr); 8496 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8497 } 8498 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8499 8500 /* free adjacency */ 8501 if (free_used_adj) { 8502 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8503 } 8504 PetscFunctionReturn(0); 8505 } 8506 8507 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8508 { 8509 PC_IS *pcis=(PC_IS*)pc->data; 8510 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8511 PCBDDCGraph graph; 8512 PetscErrorCode ierr; 8513 8514 PetscFunctionBegin; 8515 /* attach interface graph for determining subsets */ 8516 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8517 IS verticesIS,verticescomm; 8518 PetscInt vsize,*idxs; 8519 8520 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8521 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8522 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8523 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8524 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8525 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8526 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8527 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8528 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8529 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8530 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8531 } else { 8532 graph = pcbddc->mat_graph; 8533 } 8534 /* print some info */ 8535 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8536 IS vertices; 8537 PetscInt nv,nedges,nfaces; 8538 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8539 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8540 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8541 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8542 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8543 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8544 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8545 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8546 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8547 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8548 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8549 } 8550 8551 /* sub_schurs init */ 8552 if (!pcbddc->sub_schurs) { 8553 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8554 } 8555 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,((PetscObject)pc)->prefix,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8556 8557 /* free graph struct */ 8558 if (pcbddc->sub_schurs_rebuild) { 8559 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8560 } 8561 PetscFunctionReturn(0); 8562 } 8563 8564 PetscErrorCode PCBDDCCheckOperator(PC pc) 8565 { 8566 PC_IS *pcis=(PC_IS*)pc->data; 8567 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8568 PetscErrorCode ierr; 8569 8570 PetscFunctionBegin; 8571 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8572 IS zerodiag = NULL; 8573 Mat S_j,B0_B=NULL; 8574 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8575 PetscScalar *p0_check,*array,*array2; 8576 PetscReal norm; 8577 PetscInt i; 8578 8579 /* B0 and B0_B */ 8580 if (zerodiag) { 8581 IS dummy; 8582 8583 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8584 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8585 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8586 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8587 } 8588 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8589 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8590 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8591 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8592 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8593 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8594 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8595 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8596 /* S_j */ 8597 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8598 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8599 8600 /* mimic vector in \widetilde{W}_\Gamma */ 8601 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8602 /* continuous in primal space */ 8603 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8604 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8605 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8606 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8607 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8608 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8609 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8610 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8611 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8612 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8613 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8614 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8615 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8616 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8617 8618 /* assemble rhs for coarse problem */ 8619 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8620 /* local with Schur */ 8621 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8622 if (zerodiag) { 8623 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8624 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8625 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8626 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8627 } 8628 /* sum on primal nodes the local contributions */ 8629 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8630 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8631 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8632 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8633 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8634 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8635 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8636 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8637 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8638 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8639 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8640 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8641 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8642 /* scale primal nodes (BDDC sums contibutions) */ 8643 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8644 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8645 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8646 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8647 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8648 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8649 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8650 /* global: \widetilde{B0}_B w_\Gamma */ 8651 if (zerodiag) { 8652 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8653 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8654 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8655 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8656 } 8657 /* BDDC */ 8658 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8659 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8660 8661 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8662 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8663 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8664 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8665 for (i=0;i<pcbddc->benign_n;i++) { 8666 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8667 } 8668 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8669 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8670 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8671 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8672 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8673 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8674 } 8675 PetscFunctionReturn(0); 8676 } 8677 8678 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8679 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8680 { 8681 Mat At; 8682 IS rows; 8683 PetscInt rst,ren; 8684 PetscErrorCode ierr; 8685 PetscLayout rmap; 8686 8687 PetscFunctionBegin; 8688 rst = ren = 0; 8689 if (ccomm != MPI_COMM_NULL) { 8690 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8691 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8692 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8693 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8694 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8695 } 8696 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8697 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8698 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8699 8700 if (ccomm != MPI_COMM_NULL) { 8701 Mat_MPIAIJ *a,*b; 8702 IS from,to; 8703 Vec gvec; 8704 PetscInt lsize; 8705 8706 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8707 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8708 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8709 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8710 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8711 a = (Mat_MPIAIJ*)At->data; 8712 b = (Mat_MPIAIJ*)(*B)->data; 8713 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8714 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8715 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8716 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8717 b->A = a->A; 8718 b->B = a->B; 8719 8720 b->donotstash = a->donotstash; 8721 b->roworiented = a->roworiented; 8722 b->rowindices = 0; 8723 b->rowvalues = 0; 8724 b->getrowactive = PETSC_FALSE; 8725 8726 (*B)->rmap = rmap; 8727 (*B)->factortype = A->factortype; 8728 (*B)->assembled = PETSC_TRUE; 8729 (*B)->insertmode = NOT_SET_VALUES; 8730 (*B)->preallocated = PETSC_TRUE; 8731 8732 if (a->colmap) { 8733 #if defined(PETSC_USE_CTABLE) 8734 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8735 #else 8736 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8737 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8738 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8739 #endif 8740 } else b->colmap = 0; 8741 if (a->garray) { 8742 PetscInt len; 8743 len = a->B->cmap->n; 8744 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8745 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8746 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8747 } else b->garray = 0; 8748 8749 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8750 b->lvec = a->lvec; 8751 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8752 8753 /* cannot use VecScatterCopy */ 8754 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8755 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8756 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8757 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8758 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8759 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8760 ierr = ISDestroy(&from);CHKERRQ(ierr); 8761 ierr = ISDestroy(&to);CHKERRQ(ierr); 8762 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8763 } 8764 ierr = MatDestroy(&At);CHKERRQ(ierr); 8765 PetscFunctionReturn(0); 8766 } 8767