1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <../src/mat/impls/dense/seq/dense.h> 5 #include <petscdmplex.h> 6 #include <petscblaslapack.h> 7 #include <petsc/private/sfimpl.h> 8 #include <petsc/private/dmpleximpl.h> 9 #include <petscdmda.h> 10 11 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 12 13 /* if range is true, it returns B s.t. span{B} = range(A) 14 if range is false, it returns B s.t. range(B) _|_ range(A) */ 15 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 16 { 17 #if !defined(PETSC_USE_COMPLEX) 18 PetscScalar *uwork,*data,*U, ds = 0.; 19 PetscReal *sing; 20 PetscBLASInt bM,bN,lwork,lierr,di = 1; 21 PetscInt ulw,i,nr,nc,n; 22 PetscErrorCode ierr; 23 24 PetscFunctionBegin; 25 #if defined(PETSC_MISSING_LAPACK_GESVD) 26 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 27 #else 28 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 29 if (!nr || !nc) PetscFunctionReturn(0); 30 31 /* workspace */ 32 if (!work) { 33 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 34 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 35 } else { 36 ulw = lw; 37 uwork = work; 38 } 39 n = PetscMin(nr,nc); 40 if (!rwork) { 41 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 42 } else { 43 sing = rwork; 44 } 45 46 /* SVD */ 47 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 49 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 50 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 51 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 52 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 53 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 54 ierr = PetscFPTrapPop();CHKERRQ(ierr); 55 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 56 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 57 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 58 if (!rwork) { 59 ierr = PetscFree(sing);CHKERRQ(ierr); 60 } 61 if (!work) { 62 ierr = PetscFree(uwork);CHKERRQ(ierr); 63 } 64 /* create B */ 65 if (!range) { 66 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 67 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 68 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 69 } else { 70 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 71 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 72 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 73 } 74 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 75 ierr = PetscFree(U);CHKERRQ(ierr); 76 #endif 77 #else /* PETSC_USE_COMPLEX */ 78 PetscFunctionBegin; 79 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 80 #endif 81 PetscFunctionReturn(0); 82 } 83 84 /* TODO REMOVE */ 85 #if defined(PRINT_GDET) 86 static int inc = 0; 87 static int lev = 0; 88 #endif 89 90 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 91 { 92 PetscErrorCode ierr; 93 Mat GE,GEd; 94 PetscInt rsize,csize,esize; 95 PetscScalar *ptr; 96 97 PetscFunctionBegin; 98 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 99 if (!esize) PetscFunctionReturn(0); 100 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 101 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 102 103 /* gradients */ 104 ptr = work + 5*esize; 105 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 106 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 107 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 108 ierr = MatDestroy(&GE);CHKERRQ(ierr); 109 110 /* constants */ 111 ptr += rsize*csize; 112 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 113 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 114 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 115 ierr = MatDestroy(&GE);CHKERRQ(ierr); 116 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 117 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 118 119 if (corners) { 120 Mat GEc; 121 PetscScalar *vals,v; 122 123 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 124 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 125 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 126 /* v = PetscAbsScalar(vals[0]) */; 127 v = 1.; 128 cvals[0] = vals[0]/v; 129 cvals[1] = vals[1]/v; 130 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 131 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 132 #if defined(PRINT_GDET) 133 { 134 PetscViewer viewer; 135 char filename[256]; 136 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 137 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 138 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 139 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 140 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 141 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 142 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 143 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 144 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 145 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 146 } 147 #endif 148 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 149 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 150 } 151 152 PetscFunctionReturn(0); 153 } 154 155 PetscErrorCode PCBDDCNedelecSupport(PC pc) 156 { 157 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 158 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 159 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 160 Vec tvec; 161 PetscSF sfv; 162 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 163 MPI_Comm comm; 164 IS lned,primals,allprimals,nedfieldlocal; 165 IS *eedges,*extrows,*extcols,*alleedges; 166 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 167 PetscScalar *vals,*work; 168 PetscReal *rwork; 169 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 170 PetscInt ne,nv,Lv,order,n,field; 171 PetscInt n_neigh,*neigh,*n_shared,**shared; 172 PetscInt i,j,extmem,cum,maxsize,nee; 173 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 174 PetscInt *sfvleaves,*sfvroots; 175 PetscInt *corners,*cedges; 176 PetscInt *ecount,**eneighs,*vcount,**vneighs; 177 #if defined(PETSC_USE_DEBUG) 178 PetscInt *emarks; 179 #endif 180 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 181 PetscErrorCode ierr; 182 183 PetscFunctionBegin; 184 /* If the discrete gradient is defined for a subset of dofs and global is true, 185 it assumes G is given in global ordering for all the dofs. 186 Otherwise, the ordering is global for the Nedelec field */ 187 order = pcbddc->nedorder; 188 conforming = pcbddc->conforming; 189 field = pcbddc->nedfield; 190 global = pcbddc->nedglobal; 191 setprimal = PETSC_FALSE; 192 print = PETSC_FALSE; 193 singular = PETSC_FALSE; 194 195 /* Command line customization */ 196 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 199 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 200 /* print debug info TODO: to be removed */ 201 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 202 ierr = PetscOptionsEnd();CHKERRQ(ierr); 203 204 /* Return if there are no edges in the decomposition and the problem is not singular */ 205 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 206 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 207 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 208 if (!singular) { 209 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 210 lrc[0] = PETSC_FALSE; 211 for (i=0;i<n;i++) { 212 if (PetscRealPart(vals[i]) > 2.) { 213 lrc[0] = PETSC_TRUE; 214 break; 215 } 216 } 217 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 218 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 219 if (!lrc[1]) PetscFunctionReturn(0); 220 } 221 222 /* Get Nedelec field */ 223 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 224 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 225 if (pcbddc->n_ISForDofsLocal && field >= 0) { 226 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 227 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 228 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 229 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 230 ne = n; 231 nedfieldlocal = NULL; 232 global = PETSC_TRUE; 233 } else if (field == PETSC_DECIDE) { 234 PetscInt rst,ren,*idx; 235 236 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 237 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 238 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 239 for (i=rst;i<ren;i++) { 240 PetscInt nc; 241 242 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 243 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 244 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 245 } 246 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 247 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 248 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 249 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 250 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 251 } else { 252 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 253 } 254 255 /* Sanity checks */ 256 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 257 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 258 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 259 260 /* Just set primal dofs and return */ 261 if (setprimal) { 262 IS enedfieldlocal; 263 PetscInt *eidxs; 264 265 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 266 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 267 if (nedfieldlocal) { 268 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 269 for (i=0,cum=0;i<ne;i++) { 270 if (PetscRealPart(vals[idxs[i]]) > 2.) { 271 eidxs[cum++] = idxs[i]; 272 } 273 } 274 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 275 } else { 276 for (i=0,cum=0;i<ne;i++) { 277 if (PetscRealPart(vals[i]) > 2.) { 278 eidxs[cum++] = i; 279 } 280 } 281 } 282 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 283 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 284 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 285 ierr = PetscFree(eidxs);CHKERRQ(ierr); 286 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 287 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 288 PetscFunctionReturn(0); 289 } 290 291 /* Compute some l2g maps */ 292 if (nedfieldlocal) { 293 IS is; 294 295 /* need to map from the local Nedelec field to local numbering */ 296 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 297 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 298 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 299 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 300 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 301 if (global) { 302 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 303 el2g = al2g; 304 } else { 305 IS gis; 306 307 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 308 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 309 ierr = ISDestroy(&gis);CHKERRQ(ierr); 310 } 311 ierr = ISDestroy(&is);CHKERRQ(ierr); 312 } else { 313 /* restore default */ 314 pcbddc->nedfield = -1; 315 /* one ref for the destruction of al2g, one for el2g */ 316 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 317 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 318 el2g = al2g; 319 fl2g = NULL; 320 } 321 322 /* Start communication to drop connections for interior edges (for cc analysis only) */ 323 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 324 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 325 if (nedfieldlocal) { 326 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 327 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 328 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 329 } else { 330 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 331 } 332 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 333 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 334 335 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 336 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 337 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 338 if (global) { 339 PetscInt rst; 340 341 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 342 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 343 if (matis->sf_rootdata[i] < 2) { 344 matis->sf_rootdata[cum++] = i + rst; 345 } 346 } 347 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 348 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 349 } else { 350 PetscInt *tbz; 351 352 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 353 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 354 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 355 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 356 for (i=0,cum=0;i<ne;i++) 357 if (matis->sf_leafdata[idxs[i]] == 1) 358 tbz[cum++] = i; 359 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 360 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 361 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 362 ierr = PetscFree(tbz);CHKERRQ(ierr); 363 } 364 } else { /* we need the entire G to infer the nullspace */ 365 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 366 G = pcbddc->discretegradient; 367 } 368 369 /* Extract subdomain relevant rows of G */ 370 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 371 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 372 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 373 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 374 ierr = ISDestroy(&lned);CHKERRQ(ierr); 375 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 376 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 377 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 378 379 /* SF for nodal dofs communications */ 380 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 381 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 382 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 383 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 384 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 385 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 386 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 387 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 388 i = singular ? 2 : 1; 389 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 390 391 /* Destroy temporary G created in MATIS format and modified G */ 392 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 393 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 394 ierr = MatDestroy(&G);CHKERRQ(ierr); 395 396 if (print) { 397 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 398 ierr = MatView(lG,NULL);CHKERRQ(ierr); 399 } 400 401 /* Save lG for values insertion in change of basis */ 402 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 403 404 /* Analyze the edge-nodes connections (duplicate lG) */ 405 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 406 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 407 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 409 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 410 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 411 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 412 /* need to import the boundary specification to ensure the 413 proper detection of coarse edges' endpoints */ 414 if (pcbddc->DirichletBoundariesLocal) { 415 IS is; 416 417 if (fl2g) { 418 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 419 } else { 420 is = pcbddc->DirichletBoundariesLocal; 421 } 422 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 423 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 424 for (i=0;i<cum;i++) { 425 if (idxs[i] >= 0) { 426 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 427 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 428 } 429 } 430 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 431 if (fl2g) { 432 ierr = ISDestroy(&is);CHKERRQ(ierr); 433 } 434 } 435 if (pcbddc->NeumannBoundariesLocal) { 436 IS is; 437 438 if (fl2g) { 439 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 440 } else { 441 is = pcbddc->NeumannBoundariesLocal; 442 } 443 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 444 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 445 for (i=0;i<cum;i++) { 446 if (idxs[i] >= 0) { 447 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 448 } 449 } 450 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 451 if (fl2g) { 452 ierr = ISDestroy(&is);CHKERRQ(ierr); 453 } 454 } 455 456 /* Count neighs per dof */ 457 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 458 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 459 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 460 for (i=1,cum=0;i<n_neigh;i++) { 461 cum += n_shared[i]; 462 for (j=0;j<n_shared[i];j++) { 463 ecount[shared[i][j]]++; 464 } 465 } 466 if (ne) { 467 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 468 } 469 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 470 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 471 for (i=1;i<n_neigh;i++) { 472 for (j=0;j<n_shared[i];j++) { 473 PetscInt k = shared[i][j]; 474 eneighs[k][ecount[k]] = neigh[i]; 475 ecount[k]++; 476 } 477 } 478 for (i=0;i<ne;i++) { 479 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 480 } 481 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 482 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 483 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 484 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 485 for (i=1,cum=0;i<n_neigh;i++) { 486 cum += n_shared[i]; 487 for (j=0;j<n_shared[i];j++) { 488 vcount[shared[i][j]]++; 489 } 490 } 491 if (nv) { 492 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 493 } 494 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 495 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 496 for (i=1;i<n_neigh;i++) { 497 for (j=0;j<n_shared[i];j++) { 498 PetscInt k = shared[i][j]; 499 vneighs[k][vcount[k]] = neigh[i]; 500 vcount[k]++; 501 } 502 } 503 for (i=0;i<nv;i++) { 504 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 505 } 506 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 507 508 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 509 for proper detection of coarse edges' endpoints */ 510 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 511 for (i=0;i<ne;i++) { 512 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 513 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 514 } 515 } 516 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 517 if (!conforming) { 518 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 519 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 520 } 521 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 522 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 523 cum = 0; 524 for (i=0;i<ne;i++) { 525 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 526 if (!PetscBTLookup(btee,i)) { 527 marks[cum++] = i; 528 continue; 529 } 530 /* set badly connected edge dofs as primal */ 531 if (!conforming) { 532 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 533 marks[cum++] = i; 534 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 535 for (j=ii[i];j<ii[i+1];j++) { 536 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 537 } 538 } else { 539 /* every edge dofs should be connected trough a certain number of nodal dofs 540 to other edge dofs belonging to coarse edges 541 - at most 2 endpoints 542 - order-1 interior nodal dofs 543 - no undefined nodal dofs (nconn < order) 544 */ 545 PetscInt ends = 0,ints = 0, undef = 0; 546 for (j=ii[i];j<ii[i+1];j++) { 547 PetscInt v = jj[j],k; 548 PetscInt nconn = iit[v+1]-iit[v]; 549 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 550 if (nconn > order) ends++; 551 else if (nconn == order) ints++; 552 else undef++; 553 } 554 if (undef || ends > 2 || ints != order -1) { 555 marks[cum++] = i; 556 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 557 for (j=ii[i];j<ii[i+1];j++) { 558 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 559 } 560 } 561 } 562 } 563 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 564 if (!order && ii[i+1] != ii[i]) { 565 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 566 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 567 } 568 } 569 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 570 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 571 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 572 if (!conforming) { 573 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 574 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 575 } 576 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 577 578 /* identify splitpoints and corner candidates */ 579 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 580 if (print) { 581 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 582 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 583 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 584 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 585 } 586 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 587 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 588 for (i=0;i<nv;i++) { 589 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 590 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 591 if (!order) { /* variable order */ 592 PetscReal vorder = 0.; 593 594 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 595 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 596 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 597 ord = 1; 598 } 599 #if defined(PETSC_USE_DEBUG) 600 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 601 #endif 602 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 603 if (PetscBTLookup(btbd,jj[j])) { 604 bdir = PETSC_TRUE; 605 break; 606 } 607 if (vc != ecount[jj[j]]) { 608 sneighs = PETSC_FALSE; 609 } else { 610 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 611 for (k=0;k<vc;k++) { 612 if (vn[k] != en[k]) { 613 sneighs = PETSC_FALSE; 614 break; 615 } 616 } 617 } 618 } 619 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 620 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 621 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 622 } else if (test == ord) { 623 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 624 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 625 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 626 } else { 627 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 628 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 629 } 630 } 631 } 632 ierr = PetscFree(ecount);CHKERRQ(ierr); 633 ierr = PetscFree(vcount);CHKERRQ(ierr); 634 if (ne) { 635 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 636 } 637 if (nv) { 638 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 639 } 640 ierr = PetscFree(eneighs);CHKERRQ(ierr); 641 ierr = PetscFree(vneighs);CHKERRQ(ierr); 642 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 643 644 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 645 if (order != 1) { 646 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 647 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 648 for (i=0;i<nv;i++) { 649 if (PetscBTLookup(btvcand,i)) { 650 PetscBool found = PETSC_FALSE; 651 for (j=ii[i];j<ii[i+1] && !found;j++) { 652 PetscInt k,e = jj[j]; 653 if (PetscBTLookup(bte,e)) continue; 654 for (k=iit[e];k<iit[e+1];k++) { 655 PetscInt v = jjt[k]; 656 if (v != i && PetscBTLookup(btvcand,v)) { 657 found = PETSC_TRUE; 658 break; 659 } 660 } 661 } 662 if (!found) { 663 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 664 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 665 } else { 666 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 667 } 668 } 669 } 670 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 671 } 672 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 673 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 674 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 675 676 /* Get the local G^T explicitly */ 677 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 678 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 679 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 680 681 /* Mark interior nodal dofs */ 682 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 683 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 684 for (i=1;i<n_neigh;i++) { 685 for (j=0;j<n_shared[i];j++) { 686 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 687 } 688 } 689 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 690 691 /* communicate corners and splitpoints */ 692 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 693 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 694 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 695 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 696 697 if (print) { 698 IS tbz; 699 700 cum = 0; 701 for (i=0;i<nv;i++) 702 if (sfvleaves[i]) 703 vmarks[cum++] = i; 704 705 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 706 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 707 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 708 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 709 } 710 711 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 712 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 713 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 714 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 715 716 /* Zero rows of lGt corresponding to identified corners 717 and interior nodal dofs */ 718 cum = 0; 719 for (i=0;i<nv;i++) { 720 if (sfvleaves[i]) { 721 vmarks[cum++] = i; 722 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 723 } 724 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 725 } 726 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 727 if (print) { 728 IS tbz; 729 730 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 731 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 732 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 733 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 734 } 735 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 736 ierr = PetscFree(vmarks);CHKERRQ(ierr); 737 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 738 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 739 740 /* Recompute G */ 741 ierr = MatDestroy(&lG);CHKERRQ(ierr); 742 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 743 if (print) { 744 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 745 ierr = MatView(lG,NULL);CHKERRQ(ierr); 746 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 747 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 748 } 749 750 /* Get primal dofs (if any) */ 751 cum = 0; 752 for (i=0;i<ne;i++) { 753 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 754 } 755 if (fl2g) { 756 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 757 } 758 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 759 if (print) { 760 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 761 ierr = ISView(primals,NULL);CHKERRQ(ierr); 762 } 763 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 764 /* TODO: what if the user passed in some of them ? */ 765 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 766 ierr = ISDestroy(&primals);CHKERRQ(ierr); 767 768 /* Compute edge connectivity */ 769 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 770 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 771 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 772 if (fl2g) { 773 PetscBT btf; 774 PetscInt *iia,*jja,*iiu,*jju; 775 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 776 777 /* create CSR for all local dofs */ 778 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 779 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 780 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 781 iiu = pcbddc->mat_graph->xadj; 782 jju = pcbddc->mat_graph->adjncy; 783 } else if (pcbddc->use_local_adj) { 784 rest = PETSC_TRUE; 785 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 786 } else { 787 free = PETSC_TRUE; 788 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 789 iiu[0] = 0; 790 for (i=0;i<n;i++) { 791 iiu[i+1] = i+1; 792 jju[i] = -1; 793 } 794 } 795 796 /* import sizes of CSR */ 797 iia[0] = 0; 798 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 799 800 /* overwrite entries corresponding to the Nedelec field */ 801 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 802 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 803 for (i=0;i<ne;i++) { 804 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 805 iia[idxs[i]+1] = ii[i+1]-ii[i]; 806 } 807 808 /* iia in CSR */ 809 for (i=0;i<n;i++) iia[i+1] += iia[i]; 810 811 /* jja in CSR */ 812 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 813 for (i=0;i<n;i++) 814 if (!PetscBTLookup(btf,i)) 815 for (j=0;j<iiu[i+1]-iiu[i];j++) 816 jja[iia[i]+j] = jju[iiu[i]+j]; 817 818 /* map edge dofs connectivity */ 819 if (jj) { 820 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 821 for (i=0;i<ne;i++) { 822 PetscInt e = idxs[i]; 823 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 824 } 825 } 826 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 827 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 828 if (rest) { 829 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 830 } 831 if (free) { 832 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 833 } 834 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 835 } else { 836 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 837 } 838 839 /* Analyze interface for edge dofs */ 840 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 841 pcbddc->mat_graph->twodim = PETSC_FALSE; 842 843 /* Get coarse edges in the edge space */ 844 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 845 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 846 847 if (fl2g) { 848 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 849 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 850 for (i=0;i<nee;i++) { 851 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 852 } 853 } else { 854 eedges = alleedges; 855 primals = allprimals; 856 } 857 858 /* Mark fine edge dofs with their coarse edge id */ 859 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 860 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 861 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 862 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 863 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 864 if (print) { 865 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 866 ierr = ISView(primals,NULL);CHKERRQ(ierr); 867 } 868 869 maxsize = 0; 870 for (i=0;i<nee;i++) { 871 PetscInt size,mark = i+1; 872 873 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 874 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 875 for (j=0;j<size;j++) marks[idxs[j]] = mark; 876 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 877 maxsize = PetscMax(maxsize,size); 878 } 879 880 /* Find coarse edge endpoints */ 881 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 882 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 883 for (i=0;i<nee;i++) { 884 PetscInt mark = i+1,size; 885 886 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 887 if (!size && nedfieldlocal) continue; 888 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 889 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 890 if (print) { 891 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 892 ISView(eedges[i],NULL); 893 } 894 for (j=0;j<size;j++) { 895 PetscInt k, ee = idxs[j]; 896 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 897 for (k=ii[ee];k<ii[ee+1];k++) { 898 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 899 if (PetscBTLookup(btv,jj[k])) { 900 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 901 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 902 PetscInt k2; 903 PetscBool corner = PETSC_FALSE; 904 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 905 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 906 /* it's a corner if either is connected with an edge dof belonging to a different cc or 907 if the edge dof lie on the natural part of the boundary */ 908 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 909 corner = PETSC_TRUE; 910 break; 911 } 912 } 913 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 914 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 915 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 916 } else { 917 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 918 } 919 } 920 } 921 } 922 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 923 } 924 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 925 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 926 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 927 928 /* Reset marked primal dofs */ 929 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 930 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 931 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 932 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 933 934 /* Now use the initial lG */ 935 ierr = MatDestroy(&lG);CHKERRQ(ierr); 936 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 937 lG = lGinit; 938 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 939 940 /* Compute extended cols indices */ 941 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 942 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 943 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 944 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 945 i *= maxsize; 946 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 947 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 948 eerr = PETSC_FALSE; 949 for (i=0;i<nee;i++) { 950 PetscInt size,found = 0; 951 952 cum = 0; 953 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 954 if (!size && nedfieldlocal) continue; 955 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 956 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 957 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 958 for (j=0;j<size;j++) { 959 PetscInt k,ee = idxs[j]; 960 for (k=ii[ee];k<ii[ee+1];k++) { 961 PetscInt vv = jj[k]; 962 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 963 else if (!PetscBTLookupSet(btvc,vv)) found++; 964 } 965 } 966 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 967 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 968 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 969 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 970 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 971 /* it may happen that endpoints are not defined at this point 972 if it is the case, mark this edge for a second pass */ 973 if (cum != size -1 || found != 2) { 974 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 975 if (print) { 976 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 977 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 978 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 979 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 980 } 981 eerr = PETSC_TRUE; 982 } 983 } 984 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 985 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 986 if (done) { 987 PetscInt *newprimals; 988 989 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 990 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 991 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 992 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 993 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 994 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 995 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 996 for (i=0;i<nee;i++) { 997 PetscBool has_candidates = PETSC_FALSE; 998 if (PetscBTLookup(bter,i)) { 999 PetscInt size,mark = i+1; 1000 1001 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1002 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1003 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1004 for (j=0;j<size;j++) { 1005 PetscInt k,ee = idxs[j]; 1006 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1007 for (k=ii[ee];k<ii[ee+1];k++) { 1008 /* set all candidates located on the edge as corners */ 1009 if (PetscBTLookup(btvcand,jj[k])) { 1010 PetscInt k2,vv = jj[k]; 1011 has_candidates = PETSC_TRUE; 1012 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1013 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1014 /* set all edge dofs connected to candidate as primals */ 1015 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1016 if (marks[jjt[k2]] == mark) { 1017 PetscInt k3,ee2 = jjt[k2]; 1018 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1019 newprimals[cum++] = ee2; 1020 /* finally set the new corners */ 1021 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1022 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1023 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1024 } 1025 } 1026 } 1027 } else { 1028 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1029 } 1030 } 1031 } 1032 if (!has_candidates) { /* circular edge */ 1033 PetscInt k, ee = idxs[0],*tmarks; 1034 1035 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1036 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1037 for (k=ii[ee];k<ii[ee+1];k++) { 1038 PetscInt k2; 1039 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1040 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1041 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1042 } 1043 for (j=0;j<size;j++) { 1044 if (tmarks[idxs[j]] > 1) { 1045 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1046 newprimals[cum++] = idxs[j]; 1047 } 1048 } 1049 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1050 } 1051 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1052 } 1053 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1054 } 1055 ierr = PetscFree(extcols);CHKERRQ(ierr); 1056 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1057 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1058 if (fl2g) { 1059 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1060 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1061 for (i=0;i<nee;i++) { 1062 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1063 } 1064 ierr = PetscFree(eedges);CHKERRQ(ierr); 1065 } 1066 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1067 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1068 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1069 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1070 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1071 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1072 pcbddc->mat_graph->twodim = PETSC_FALSE; 1073 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1074 if (fl2g) { 1075 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1076 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1077 for (i=0;i<nee;i++) { 1078 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1079 } 1080 } else { 1081 eedges = alleedges; 1082 primals = allprimals; 1083 } 1084 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1085 1086 /* Mark again */ 1087 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1088 for (i=0;i<nee;i++) { 1089 PetscInt size,mark = i+1; 1090 1091 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1092 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1093 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1094 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1095 } 1096 if (print) { 1097 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1098 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1099 } 1100 1101 /* Recompute extended cols */ 1102 eerr = PETSC_FALSE; 1103 for (i=0;i<nee;i++) { 1104 PetscInt size; 1105 1106 cum = 0; 1107 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1108 if (!size && nedfieldlocal) continue; 1109 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1110 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1111 for (j=0;j<size;j++) { 1112 PetscInt k,ee = idxs[j]; 1113 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1114 } 1115 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1116 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1117 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1118 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1119 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1120 if (cum != size -1) { 1121 if (print) { 1122 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1123 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1124 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1125 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1126 } 1127 eerr = PETSC_TRUE; 1128 } 1129 } 1130 } 1131 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1132 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1133 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1134 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1135 /* an error should not occur at this point */ 1136 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1137 1138 /* Check the number of endpoints */ 1139 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1140 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1141 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1142 for (i=0;i<nee;i++) { 1143 PetscInt size, found = 0, gc[2]; 1144 1145 /* init with defaults */ 1146 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1147 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1148 if (!size && nedfieldlocal) continue; 1149 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1150 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1151 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1152 for (j=0;j<size;j++) { 1153 PetscInt k,ee = idxs[j]; 1154 for (k=ii[ee];k<ii[ee+1];k++) { 1155 PetscInt vv = jj[k]; 1156 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1157 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1158 corners[i*2+found++] = vv; 1159 } 1160 } 1161 } 1162 if (found != 2) { 1163 PetscInt e; 1164 if (fl2g) { 1165 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1166 } else { 1167 e = idxs[0]; 1168 } 1169 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1170 } 1171 1172 /* get primal dof index on this coarse edge */ 1173 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1174 if (gc[0] > gc[1]) { 1175 PetscInt swap = corners[2*i]; 1176 corners[2*i] = corners[2*i+1]; 1177 corners[2*i+1] = swap; 1178 } 1179 cedges[i] = idxs[size-1]; 1180 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1181 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1182 } 1183 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1184 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1185 1186 #if defined(PETSC_USE_DEBUG) 1187 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1188 not interfere with neighbouring coarse edges */ 1189 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1190 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1191 for (i=0;i<nv;i++) { 1192 PetscInt emax = 0,eemax = 0; 1193 1194 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1195 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1196 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1197 for (j=1;j<nee+1;j++) { 1198 if (emax < emarks[j]) { 1199 emax = emarks[j]; 1200 eemax = j; 1201 } 1202 } 1203 /* not relevant for edges */ 1204 if (!eemax) continue; 1205 1206 for (j=ii[i];j<ii[i+1];j++) { 1207 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1208 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1209 } 1210 } 1211 } 1212 ierr = PetscFree(emarks);CHKERRQ(ierr); 1213 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1214 #endif 1215 1216 /* Compute extended rows indices for edge blocks of the change of basis */ 1217 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1218 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1219 extmem *= maxsize; 1220 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1221 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1222 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1223 for (i=0;i<nv;i++) { 1224 PetscInt mark = 0,size,start; 1225 1226 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1227 for (j=ii[i];j<ii[i+1];j++) 1228 if (marks[jj[j]] && !mark) 1229 mark = marks[jj[j]]; 1230 1231 /* not relevant */ 1232 if (!mark) continue; 1233 1234 /* import extended row */ 1235 mark--; 1236 start = mark*extmem+extrowcum[mark]; 1237 size = ii[i+1]-ii[i]; 1238 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1239 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1240 extrowcum[mark] += size; 1241 } 1242 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1243 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1244 ierr = PetscFree(marks);CHKERRQ(ierr); 1245 1246 /* Compress extrows */ 1247 cum = 0; 1248 for (i=0;i<nee;i++) { 1249 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1250 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1251 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1252 cum = PetscMax(cum,size); 1253 } 1254 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1255 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1256 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1257 1258 /* Workspace for lapack inner calls and VecSetValues */ 1259 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1260 1261 /* Create change of basis matrix (preallocation can be improved) */ 1262 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1263 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1264 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1265 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1266 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1267 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1268 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1269 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1270 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1271 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1272 1273 /* Defaults to identity */ 1274 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1275 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1276 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1277 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1278 1279 /* Create discrete gradient for the coarser level if needed */ 1280 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1281 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1282 if (pcbddc->current_level < pcbddc->max_levels) { 1283 ISLocalToGlobalMapping cel2g,cvl2g; 1284 IS wis,gwis; 1285 PetscInt cnv,cne; 1286 1287 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1288 if (fl2g) { 1289 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1290 } else { 1291 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1292 pcbddc->nedclocal = wis; 1293 } 1294 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1295 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1296 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1297 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1298 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1299 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1300 1301 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1302 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1303 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1304 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1305 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1306 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1307 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1308 1309 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1310 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1311 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1312 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1313 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1314 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1315 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1316 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1317 } 1318 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1319 1320 #if defined(PRINT_GDET) 1321 inc = 0; 1322 lev = pcbddc->current_level; 1323 #endif 1324 1325 /* Insert values in the change of basis matrix */ 1326 for (i=0;i<nee;i++) { 1327 Mat Gins = NULL, GKins = NULL; 1328 IS cornersis = NULL; 1329 PetscScalar cvals[2]; 1330 1331 if (pcbddc->nedcG) { 1332 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1333 } 1334 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1335 if (Gins && GKins) { 1336 PetscScalar *data; 1337 const PetscInt *rows,*cols; 1338 PetscInt nrh,nch,nrc,ncc; 1339 1340 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1341 /* H1 */ 1342 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1343 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1344 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1345 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1346 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1347 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1348 /* complement */ 1349 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1350 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1351 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1352 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1353 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1354 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1355 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1356 1357 /* coarse discrete gradient */ 1358 if (pcbddc->nedcG) { 1359 PetscInt cols[2]; 1360 1361 cols[0] = 2*i; 1362 cols[1] = 2*i+1; 1363 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1364 } 1365 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1366 } 1367 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1368 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1369 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1370 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1371 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1372 } 1373 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1374 1375 /* Start assembling */ 1376 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1377 if (pcbddc->nedcG) { 1378 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1379 } 1380 1381 /* Free */ 1382 if (fl2g) { 1383 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1384 for (i=0;i<nee;i++) { 1385 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1386 } 1387 ierr = PetscFree(eedges);CHKERRQ(ierr); 1388 } 1389 1390 /* hack mat_graph with primal dofs on the coarse edges */ 1391 { 1392 PCBDDCGraph graph = pcbddc->mat_graph; 1393 PetscInt *oqueue = graph->queue; 1394 PetscInt *ocptr = graph->cptr; 1395 PetscInt ncc,*idxs; 1396 1397 /* find first primal edge */ 1398 if (pcbddc->nedclocal) { 1399 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1400 } else { 1401 if (fl2g) { 1402 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1403 } 1404 idxs = cedges; 1405 } 1406 cum = 0; 1407 while (cum < nee && cedges[cum] < 0) cum++; 1408 1409 /* adapt connected components */ 1410 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1411 graph->cptr[0] = 0; 1412 for (i=0,ncc=0;i<graph->ncc;i++) { 1413 PetscInt lc = ocptr[i+1]-ocptr[i]; 1414 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1415 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1416 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1417 ncc++; 1418 lc--; 1419 cum++; 1420 while (cum < nee && cedges[cum] < 0) cum++; 1421 } 1422 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1423 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1424 ncc++; 1425 } 1426 graph->ncc = ncc; 1427 if (pcbddc->nedclocal) { 1428 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1429 } 1430 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1431 } 1432 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1433 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1434 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1435 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1436 1437 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1438 ierr = PetscFree(extrow);CHKERRQ(ierr); 1439 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1440 ierr = PetscFree(corners);CHKERRQ(ierr); 1441 ierr = PetscFree(cedges);CHKERRQ(ierr); 1442 ierr = PetscFree(extrows);CHKERRQ(ierr); 1443 ierr = PetscFree(extcols);CHKERRQ(ierr); 1444 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1445 1446 /* Complete assembling */ 1447 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1448 if (pcbddc->nedcG) { 1449 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1450 #if 0 1451 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1452 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1453 #endif 1454 } 1455 1456 /* set change of basis */ 1457 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1458 ierr = MatDestroy(&T);CHKERRQ(ierr); 1459 1460 PetscFunctionReturn(0); 1461 } 1462 1463 /* the near-null space of BDDC carries information on quadrature weights, 1464 and these can be collinear -> so cheat with MatNullSpaceCreate 1465 and create a suitable set of basis vectors first */ 1466 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1467 { 1468 PetscErrorCode ierr; 1469 PetscInt i; 1470 1471 PetscFunctionBegin; 1472 for (i=0;i<nvecs;i++) { 1473 PetscInt first,last; 1474 1475 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1476 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1477 if (i>=first && i < last) { 1478 PetscScalar *data; 1479 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1480 if (!has_const) { 1481 data[i-first] = 1.; 1482 } else { 1483 data[2*i-first] = 1./PetscSqrtReal(2.); 1484 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1485 } 1486 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1487 } 1488 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1489 } 1490 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1491 for (i=0;i<nvecs;i++) { /* reset vectors */ 1492 PetscInt first,last; 1493 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1494 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1495 if (i>=first && i < last) { 1496 PetscScalar *data; 1497 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1498 if (!has_const) { 1499 data[i-first] = 0.; 1500 } else { 1501 data[2*i-first] = 0.; 1502 data[2*i-first+1] = 0.; 1503 } 1504 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1505 } 1506 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1507 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1508 } 1509 PetscFunctionReturn(0); 1510 } 1511 1512 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1513 { 1514 Mat loc_divudotp; 1515 Vec p,v,vins,quad_vec,*quad_vecs; 1516 ISLocalToGlobalMapping map; 1517 IS *faces,*edges; 1518 PetscScalar *vals; 1519 const PetscScalar *array; 1520 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1521 PetscMPIInt rank; 1522 PetscErrorCode ierr; 1523 1524 PetscFunctionBegin; 1525 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1526 if (graph->twodim) { 1527 lmaxneighs = 2; 1528 } else { 1529 lmaxneighs = 1; 1530 for (i=0;i<ne;i++) { 1531 const PetscInt *idxs; 1532 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1533 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1534 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1535 } 1536 lmaxneighs++; /* graph count does not include self */ 1537 } 1538 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1539 maxsize = 0; 1540 for (i=0;i<ne;i++) { 1541 PetscInt nn; 1542 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1543 maxsize = PetscMax(maxsize,nn); 1544 } 1545 for (i=0;i<nf;i++) { 1546 PetscInt nn; 1547 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1548 maxsize = PetscMax(maxsize,nn); 1549 } 1550 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1551 /* create vectors to hold quadrature weights */ 1552 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1553 if (!transpose) { 1554 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1555 } else { 1556 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1557 } 1558 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1559 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1560 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1561 for (i=0;i<maxneighs;i++) { 1562 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1563 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1564 } 1565 1566 /* compute local quad vec */ 1567 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1568 if (!transpose) { 1569 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1570 } else { 1571 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1572 } 1573 ierr = VecSet(p,1.);CHKERRQ(ierr); 1574 if (!transpose) { 1575 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1576 } else { 1577 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1578 } 1579 if (vl2l) { 1580 Mat lA; 1581 VecScatter sc; 1582 1583 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1584 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1585 ierr = VecScatterCreate(v,vl2l,vins,NULL,&sc);CHKERRQ(ierr); 1586 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1587 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1588 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1589 } else { 1590 vins = v; 1591 } 1592 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1593 ierr = VecDestroy(&p);CHKERRQ(ierr); 1594 1595 /* insert in global quadrature vecs */ 1596 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1597 for (i=0;i<nf;i++) { 1598 const PetscInt *idxs; 1599 PetscInt idx,nn,j; 1600 1601 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1602 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1603 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1604 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1605 idx = -(idx+1); 1606 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1607 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1608 } 1609 for (i=0;i<ne;i++) { 1610 const PetscInt *idxs; 1611 PetscInt idx,nn,j; 1612 1613 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1614 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1615 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1616 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1617 idx = -(idx+1); 1618 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1619 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1620 } 1621 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1622 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1623 if (vl2l) { 1624 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1625 } 1626 ierr = VecDestroy(&v);CHKERRQ(ierr); 1627 ierr = PetscFree(vals);CHKERRQ(ierr); 1628 1629 /* assemble near null space */ 1630 for (i=0;i<maxneighs;i++) { 1631 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1632 } 1633 for (i=0;i<maxneighs;i++) { 1634 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1635 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1636 } 1637 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1638 PetscFunctionReturn(0); 1639 } 1640 1641 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1642 { 1643 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1644 PetscErrorCode ierr; 1645 1646 PetscFunctionBegin; 1647 if (primalv) { 1648 if (pcbddc->user_primal_vertices_local) { 1649 IS list[2], newp; 1650 1651 list[0] = primalv; 1652 list[1] = pcbddc->user_primal_vertices_local; 1653 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1654 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1655 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1656 pcbddc->user_primal_vertices_local = newp; 1657 } else { 1658 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1659 } 1660 } 1661 PetscFunctionReturn(0); 1662 } 1663 1664 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1665 { 1666 PetscErrorCode ierr; 1667 Vec local,global; 1668 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1669 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1670 PetscBool monolithic = PETSC_FALSE; 1671 1672 PetscFunctionBegin; 1673 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1674 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1675 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1676 /* need to convert from global to local topology information and remove references to information in global ordering */ 1677 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1678 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1679 if (monolithic) goto boundary; 1680 1681 if (pcbddc->user_provided_isfordofs) { 1682 if (pcbddc->n_ISForDofs) { 1683 PetscInt i; 1684 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1685 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1686 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1687 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1688 } 1689 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1690 pcbddc->n_ISForDofs = 0; 1691 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1692 } 1693 } else { 1694 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1695 DM dm; 1696 1697 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1698 if (!dm) { 1699 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1700 } 1701 if (dm) { 1702 IS *fields; 1703 PetscInt nf,i; 1704 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1705 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1706 for (i=0;i<nf;i++) { 1707 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1708 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1709 } 1710 ierr = PetscFree(fields);CHKERRQ(ierr); 1711 pcbddc->n_ISForDofsLocal = nf; 1712 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1713 PetscContainer c; 1714 1715 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1716 if (c) { 1717 MatISLocalFields lf; 1718 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1719 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1720 } else { /* fallback, create the default fields if bs > 1 */ 1721 PetscInt i, n = matis->A->rmap->n; 1722 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1723 if (i > 1) { 1724 pcbddc->n_ISForDofsLocal = i; 1725 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1726 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1727 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1728 } 1729 } 1730 } 1731 } 1732 } else { 1733 PetscInt i; 1734 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1735 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1736 } 1737 } 1738 } 1739 1740 boundary: 1741 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1742 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1743 } else if (pcbddc->DirichletBoundariesLocal) { 1744 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1745 } 1746 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1747 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1748 } else if (pcbddc->NeumannBoundariesLocal) { 1749 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1750 } 1751 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1752 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1753 } 1754 ierr = VecDestroy(&global);CHKERRQ(ierr); 1755 ierr = VecDestroy(&local);CHKERRQ(ierr); 1756 /* detect local disconnected subdomains if requested (use matis->A) */ 1757 if (pcbddc->detect_disconnected) { 1758 IS primalv = NULL; 1759 PetscInt i; 1760 1761 for (i=0;i<pcbddc->n_local_subs;i++) { 1762 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1763 } 1764 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1765 ierr = PCBDDCDetectDisconnectedComponents(pc,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1766 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1767 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1768 } 1769 /* early stage corner detection */ 1770 { 1771 DM dm; 1772 1773 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1774 if (dm) { 1775 PetscBool isda; 1776 1777 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1778 if (isda) { 1779 ISLocalToGlobalMapping l2l; 1780 IS corners; 1781 Mat lA; 1782 1783 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1784 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1785 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1786 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1787 if (l2l) { 1788 const PetscInt *idx; 1789 PetscInt bs,*idxout,n; 1790 1791 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1792 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1793 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1794 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1795 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1796 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1797 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1798 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1799 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1800 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1801 } else { /* not from DMDA */ 1802 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1803 } 1804 } 1805 } 1806 } 1807 PetscFunctionReturn(0); 1808 } 1809 1810 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1811 { 1812 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1813 PetscErrorCode ierr; 1814 IS nis; 1815 const PetscInt *idxs; 1816 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1817 PetscBool *ld; 1818 1819 PetscFunctionBegin; 1820 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1821 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1822 if (mop == MPI_LAND) { 1823 /* init rootdata with true */ 1824 ld = (PetscBool*) matis->sf_rootdata; 1825 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1826 } else { 1827 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1828 } 1829 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1830 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1831 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1832 ld = (PetscBool*) matis->sf_leafdata; 1833 for (i=0;i<nd;i++) 1834 if (-1 < idxs[i] && idxs[i] < n) 1835 ld[idxs[i]] = PETSC_TRUE; 1836 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1837 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1838 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1839 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1840 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1841 if (mop == MPI_LAND) { 1842 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1843 } else { 1844 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1845 } 1846 for (i=0,nnd=0;i<n;i++) 1847 if (ld[i]) 1848 nidxs[nnd++] = i; 1849 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1850 ierr = ISDestroy(is);CHKERRQ(ierr); 1851 *is = nis; 1852 PetscFunctionReturn(0); 1853 } 1854 1855 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1856 { 1857 PC_IS *pcis = (PC_IS*)(pc->data); 1858 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1859 PetscErrorCode ierr; 1860 1861 PetscFunctionBegin; 1862 if (!pcbddc->benign_have_null) { 1863 PetscFunctionReturn(0); 1864 } 1865 if (pcbddc->ChangeOfBasisMatrix) { 1866 Vec swap; 1867 1868 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1869 swap = pcbddc->work_change; 1870 pcbddc->work_change = r; 1871 r = swap; 1872 } 1873 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1874 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1875 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1876 ierr = VecSet(z,0.);CHKERRQ(ierr); 1877 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1878 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1879 if (pcbddc->ChangeOfBasisMatrix) { 1880 pcbddc->work_change = r; 1881 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1882 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1883 } 1884 PetscFunctionReturn(0); 1885 } 1886 1887 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1888 { 1889 PCBDDCBenignMatMult_ctx ctx; 1890 PetscErrorCode ierr; 1891 PetscBool apply_right,apply_left,reset_x; 1892 1893 PetscFunctionBegin; 1894 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1895 if (transpose) { 1896 apply_right = ctx->apply_left; 1897 apply_left = ctx->apply_right; 1898 } else { 1899 apply_right = ctx->apply_right; 1900 apply_left = ctx->apply_left; 1901 } 1902 reset_x = PETSC_FALSE; 1903 if (apply_right) { 1904 const PetscScalar *ax; 1905 PetscInt nl,i; 1906 1907 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1908 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1909 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1910 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1911 for (i=0;i<ctx->benign_n;i++) { 1912 PetscScalar sum,val; 1913 const PetscInt *idxs; 1914 PetscInt nz,j; 1915 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1916 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1917 sum = 0.; 1918 if (ctx->apply_p0) { 1919 val = ctx->work[idxs[nz-1]]; 1920 for (j=0;j<nz-1;j++) { 1921 sum += ctx->work[idxs[j]]; 1922 ctx->work[idxs[j]] += val; 1923 } 1924 } else { 1925 for (j=0;j<nz-1;j++) { 1926 sum += ctx->work[idxs[j]]; 1927 } 1928 } 1929 ctx->work[idxs[nz-1]] -= sum; 1930 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1931 } 1932 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1933 reset_x = PETSC_TRUE; 1934 } 1935 if (transpose) { 1936 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1937 } else { 1938 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1939 } 1940 if (reset_x) { 1941 ierr = VecResetArray(x);CHKERRQ(ierr); 1942 } 1943 if (apply_left) { 1944 PetscScalar *ay; 1945 PetscInt i; 1946 1947 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1948 for (i=0;i<ctx->benign_n;i++) { 1949 PetscScalar sum,val; 1950 const PetscInt *idxs; 1951 PetscInt nz,j; 1952 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1953 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1954 val = -ay[idxs[nz-1]]; 1955 if (ctx->apply_p0) { 1956 sum = 0.; 1957 for (j=0;j<nz-1;j++) { 1958 sum += ay[idxs[j]]; 1959 ay[idxs[j]] += val; 1960 } 1961 ay[idxs[nz-1]] += sum; 1962 } else { 1963 for (j=0;j<nz-1;j++) { 1964 ay[idxs[j]] += val; 1965 } 1966 ay[idxs[nz-1]] = 0.; 1967 } 1968 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1969 } 1970 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1971 } 1972 PetscFunctionReturn(0); 1973 } 1974 1975 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1976 { 1977 PetscErrorCode ierr; 1978 1979 PetscFunctionBegin; 1980 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1981 PetscFunctionReturn(0); 1982 } 1983 1984 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1985 { 1986 PetscErrorCode ierr; 1987 1988 PetscFunctionBegin; 1989 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1990 PetscFunctionReturn(0); 1991 } 1992 1993 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1994 { 1995 PC_IS *pcis = (PC_IS*)pc->data; 1996 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1997 PCBDDCBenignMatMult_ctx ctx; 1998 PetscErrorCode ierr; 1999 2000 PetscFunctionBegin; 2001 if (!restore) { 2002 Mat A_IB,A_BI; 2003 PetscScalar *work; 2004 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2005 2006 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2007 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2008 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 2009 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 2010 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2011 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 2012 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 2013 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 2014 ierr = PetscNew(&ctx);CHKERRQ(ierr); 2015 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 2016 ctx->apply_left = PETSC_TRUE; 2017 ctx->apply_right = PETSC_FALSE; 2018 ctx->apply_p0 = PETSC_FALSE; 2019 ctx->benign_n = pcbddc->benign_n; 2020 if (reuse) { 2021 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2022 ctx->free = PETSC_FALSE; 2023 } else { /* TODO: could be optimized for successive solves */ 2024 ISLocalToGlobalMapping N_to_D; 2025 PetscInt i; 2026 2027 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2028 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2029 for (i=0;i<pcbddc->benign_n;i++) { 2030 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2031 } 2032 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2033 ctx->free = PETSC_TRUE; 2034 } 2035 ctx->A = pcis->A_IB; 2036 ctx->work = work; 2037 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2038 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2039 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2040 pcis->A_IB = A_IB; 2041 2042 /* A_BI as A_IB^T */ 2043 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2044 pcbddc->benign_original_mat = pcis->A_BI; 2045 pcis->A_BI = A_BI; 2046 } else { 2047 if (!pcbddc->benign_original_mat) { 2048 PetscFunctionReturn(0); 2049 } 2050 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2051 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2052 pcis->A_IB = ctx->A; 2053 ctx->A = NULL; 2054 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2055 pcis->A_BI = pcbddc->benign_original_mat; 2056 pcbddc->benign_original_mat = NULL; 2057 if (ctx->free) { 2058 PetscInt i; 2059 for (i=0;i<ctx->benign_n;i++) { 2060 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2061 } 2062 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2063 } 2064 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2065 ierr = PetscFree(ctx);CHKERRQ(ierr); 2066 } 2067 PetscFunctionReturn(0); 2068 } 2069 2070 /* used just in bddc debug mode */ 2071 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2072 { 2073 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2074 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2075 Mat An; 2076 PetscErrorCode ierr; 2077 2078 PetscFunctionBegin; 2079 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2080 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2081 if (is1) { 2082 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2083 ierr = MatDestroy(&An);CHKERRQ(ierr); 2084 } else { 2085 *B = An; 2086 } 2087 PetscFunctionReturn(0); 2088 } 2089 2090 /* TODO: add reuse flag */ 2091 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2092 { 2093 Mat Bt; 2094 PetscScalar *a,*bdata; 2095 const PetscInt *ii,*ij; 2096 PetscInt m,n,i,nnz,*bii,*bij; 2097 PetscBool flg_row; 2098 PetscErrorCode ierr; 2099 2100 PetscFunctionBegin; 2101 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2102 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2103 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2104 nnz = n; 2105 for (i=0;i<ii[n];i++) { 2106 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2107 } 2108 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2109 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2110 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2111 nnz = 0; 2112 bii[0] = 0; 2113 for (i=0;i<n;i++) { 2114 PetscInt j; 2115 for (j=ii[i];j<ii[i+1];j++) { 2116 PetscScalar entry = a[j]; 2117 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 2118 bij[nnz] = ij[j]; 2119 bdata[nnz] = entry; 2120 nnz++; 2121 } 2122 } 2123 bii[i+1] = nnz; 2124 } 2125 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2126 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2127 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2128 { 2129 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2130 b->free_a = PETSC_TRUE; 2131 b->free_ij = PETSC_TRUE; 2132 } 2133 *B = Bt; 2134 PetscFunctionReturn(0); 2135 } 2136 2137 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscInt *ncc, IS* cc[], IS* primalv) 2138 { 2139 Mat B = NULL; 2140 DM dm; 2141 IS is_dummy,*cc_n; 2142 ISLocalToGlobalMapping l2gmap_dummy; 2143 PCBDDCGraph graph; 2144 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2145 PetscInt i,n; 2146 PetscInt *xadj,*adjncy; 2147 PetscBool isplex = PETSC_FALSE; 2148 PetscErrorCode ierr; 2149 2150 PetscFunctionBegin; 2151 if (ncc) *ncc = 0; 2152 if (cc) *cc = NULL; 2153 if (primalv) *primalv = NULL; 2154 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2155 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2156 if (!dm) { 2157 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2158 } 2159 if (dm) { 2160 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2161 } 2162 if (isplex) { /* this code has been modified from plexpartition.c */ 2163 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2164 PetscInt *adj = NULL; 2165 IS cellNumbering; 2166 const PetscInt *cellNum; 2167 PetscBool useCone, useClosure; 2168 PetscSection section; 2169 PetscSegBuffer adjBuffer; 2170 PetscSF sfPoint; 2171 PetscErrorCode ierr; 2172 2173 PetscFunctionBegin; 2174 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2175 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2176 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2177 /* Build adjacency graph via a section/segbuffer */ 2178 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2179 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2180 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2181 /* Always use FVM adjacency to create partitioner graph */ 2182 ierr = DMPlexGetAdjacencyUseCone(dm, &useCone);CHKERRQ(ierr); 2183 ierr = DMPlexGetAdjacencyUseClosure(dm, &useClosure);CHKERRQ(ierr); 2184 ierr = DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);CHKERRQ(ierr); 2185 ierr = DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);CHKERRQ(ierr); 2186 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2187 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2188 for (n = 0, p = pStart; p < pEnd; p++) { 2189 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2190 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2191 adjSize = PETSC_DETERMINE; 2192 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2193 for (a = 0; a < adjSize; ++a) { 2194 const PetscInt point = adj[a]; 2195 if (pStart <= point && point < pEnd) { 2196 PetscInt *PETSC_RESTRICT pBuf; 2197 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2198 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2199 *pBuf = point; 2200 } 2201 } 2202 n++; 2203 } 2204 ierr = DMPlexSetAdjacencyUseCone(dm, useCone);CHKERRQ(ierr); 2205 ierr = DMPlexSetAdjacencyUseClosure(dm, useClosure);CHKERRQ(ierr); 2206 /* Derive CSR graph from section/segbuffer */ 2207 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2208 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2209 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2210 for (idx = 0, p = pStart; p < pEnd; p++) { 2211 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2212 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2213 } 2214 xadj[n] = size; 2215 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2216 /* Clean up */ 2217 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2218 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2219 ierr = PetscFree(adj);CHKERRQ(ierr); 2220 graph->xadj = xadj; 2221 graph->adjncy = adjncy; 2222 } else { 2223 Mat A; 2224 PetscBool filter = PETSC_FALSE, isseqaij, flg_row; 2225 2226 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2227 if (!A->rmap->N || !A->cmap->N) { 2228 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2229 PetscFunctionReturn(0); 2230 } 2231 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2232 if (!isseqaij && filter) { 2233 PetscBool isseqdense; 2234 2235 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2236 if (!isseqdense) { 2237 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2238 } else { /* TODO: rectangular case and LDA */ 2239 PetscScalar *array; 2240 PetscReal chop=1.e-6; 2241 2242 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2243 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2244 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2245 for (i=0;i<n;i++) { 2246 PetscInt j; 2247 for (j=i+1;j<n;j++) { 2248 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2249 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2250 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2251 } 2252 } 2253 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2254 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2255 } 2256 } else { 2257 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2258 B = A; 2259 } 2260 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2261 2262 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2263 if (filter) { 2264 PetscScalar *data; 2265 PetscInt j,cum; 2266 2267 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2268 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2269 cum = 0; 2270 for (i=0;i<n;i++) { 2271 PetscInt t; 2272 2273 for (j=xadj[i];j<xadj[i+1];j++) { 2274 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2275 continue; 2276 } 2277 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2278 } 2279 t = xadj_filtered[i]; 2280 xadj_filtered[i] = cum; 2281 cum += t; 2282 } 2283 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2284 graph->xadj = xadj_filtered; 2285 graph->adjncy = adjncy_filtered; 2286 } else { 2287 graph->xadj = xadj; 2288 graph->adjncy = adjncy; 2289 } 2290 } 2291 /* compute local connected components using PCBDDCGraph */ 2292 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2293 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2294 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2295 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2296 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2297 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2298 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2299 2300 /* partial clean up */ 2301 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2302 if (B) { 2303 PetscBool flg_row; 2304 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2305 ierr = MatDestroy(&B);CHKERRQ(ierr); 2306 } 2307 if (isplex) { 2308 ierr = PetscFree(xadj);CHKERRQ(ierr); 2309 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2310 } 2311 2312 /* get back data */ 2313 if (isplex) { 2314 if (ncc) *ncc = graph->ncc; 2315 if (cc || primalv) { 2316 Mat A; 2317 PetscBT btv,btvt; 2318 PetscSection subSection; 2319 PetscInt *ids,cum,cump,*cids,*pids; 2320 2321 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2322 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2323 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2324 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2325 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2326 2327 cids[0] = 0; 2328 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2329 PetscInt j; 2330 2331 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2332 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2333 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2334 2335 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2336 for (k = 0; k < 2*size; k += 2) { 2337 PetscInt s, p = closure[k], off, dof, cdof; 2338 2339 ierr = PetscSectionGetConstraintDof(subSection, p, &cdof);CHKERRQ(ierr); 2340 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2341 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2342 for (s = 0; s < dof-cdof; s++) { 2343 if (PetscBTLookupSet(btvt,off+s)) continue; 2344 if (!PetscBTLookup(btv,off+s)) { 2345 ids[cum++] = off+s; 2346 } else { /* cross-vertex */ 2347 pids[cump++] = off+s; 2348 } 2349 } 2350 } 2351 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2352 } 2353 cids[i+1] = cum; 2354 /* mark dofs as already assigned */ 2355 for (j = cids[i]; j < cids[i+1]; j++) { 2356 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2357 } 2358 } 2359 if (cc) { 2360 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2361 for (i = 0; i < graph->ncc; i++) { 2362 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2363 } 2364 *cc = cc_n; 2365 } 2366 if (primalv) { 2367 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2368 } 2369 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2370 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2371 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2372 } 2373 } else { 2374 if (ncc) *ncc = graph->ncc; 2375 if (cc) { 2376 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2377 for (i=0;i<graph->ncc;i++) { 2378 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2379 } 2380 *cc = cc_n; 2381 } 2382 } 2383 /* clean up graph */ 2384 graph->xadj = 0; 2385 graph->adjncy = 0; 2386 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2387 PetscFunctionReturn(0); 2388 } 2389 2390 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2391 { 2392 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2393 PC_IS* pcis = (PC_IS*)(pc->data); 2394 IS dirIS = NULL; 2395 PetscInt i; 2396 PetscErrorCode ierr; 2397 2398 PetscFunctionBegin; 2399 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2400 if (zerodiag) { 2401 Mat A; 2402 Vec vec3_N; 2403 PetscScalar *vals; 2404 const PetscInt *idxs; 2405 PetscInt nz,*count; 2406 2407 /* p0 */ 2408 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2409 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2410 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2411 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2412 for (i=0;i<nz;i++) vals[i] = 1.; 2413 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2414 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2415 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2416 /* v_I */ 2417 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2418 for (i=0;i<nz;i++) vals[i] = 0.; 2419 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2420 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2421 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2422 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2423 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2424 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2425 if (dirIS) { 2426 PetscInt n; 2427 2428 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2429 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2430 for (i=0;i<n;i++) vals[i] = 0.; 2431 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2432 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2433 } 2434 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2435 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2436 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2437 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2438 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2439 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2440 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2441 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2442 ierr = PetscFree(vals);CHKERRQ(ierr); 2443 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2444 2445 /* there should not be any pressure dofs lying on the interface */ 2446 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2447 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2448 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2449 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2450 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2451 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2452 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2453 ierr = PetscFree(count);CHKERRQ(ierr); 2454 } 2455 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2456 2457 /* check PCBDDCBenignGetOrSetP0 */ 2458 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2459 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2460 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2461 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2462 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2463 for (i=0;i<pcbddc->benign_n;i++) { 2464 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2465 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2466 } 2467 PetscFunctionReturn(0); 2468 } 2469 2470 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2471 { 2472 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2473 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2474 PetscInt nz,n; 2475 PetscInt *interior_dofs,n_interior_dofs,nneu; 2476 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2477 PetscErrorCode ierr; 2478 2479 PetscFunctionBegin; 2480 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2481 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2482 for (n=0;n<pcbddc->benign_n;n++) { 2483 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2484 } 2485 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2486 pcbddc->benign_n = 0; 2487 2488 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2489 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2490 Checks if all the pressure dofs in each subdomain have a zero diagonal 2491 If not, a change of basis on pressures is not needed 2492 since the local Schur complements are already SPD 2493 */ 2494 has_null_pressures = PETSC_TRUE; 2495 have_null = PETSC_TRUE; 2496 if (pcbddc->n_ISForDofsLocal) { 2497 IS iP = NULL; 2498 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2499 2500 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2501 ierr = PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2502 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2503 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2504 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2505 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2506 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2507 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2508 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2509 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2510 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2511 if (iP) { 2512 IS newpressures; 2513 2514 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2515 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2516 pressures = newpressures; 2517 } 2518 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2519 if (!sorted) { 2520 ierr = ISSort(pressures);CHKERRQ(ierr); 2521 } 2522 } else { 2523 pressures = NULL; 2524 } 2525 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2526 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2527 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2528 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2529 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2530 if (!sorted) { 2531 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2532 } 2533 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2534 zerodiag_save = zerodiag; 2535 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2536 if (!nz) { 2537 if (n) have_null = PETSC_FALSE; 2538 has_null_pressures = PETSC_FALSE; 2539 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2540 } 2541 recompute_zerodiag = PETSC_FALSE; 2542 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2543 zerodiag_subs = NULL; 2544 pcbddc->benign_n = 0; 2545 n_interior_dofs = 0; 2546 interior_dofs = NULL; 2547 nneu = 0; 2548 if (pcbddc->NeumannBoundariesLocal) { 2549 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2550 } 2551 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2552 if (checkb) { /* need to compute interior nodes */ 2553 PetscInt n,i,j; 2554 PetscInt n_neigh,*neigh,*n_shared,**shared; 2555 PetscInt *iwork; 2556 2557 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2558 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2559 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2560 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2561 for (i=1;i<n_neigh;i++) 2562 for (j=0;j<n_shared[i];j++) 2563 iwork[shared[i][j]] += 1; 2564 for (i=0;i<n;i++) 2565 if (!iwork[i]) 2566 interior_dofs[n_interior_dofs++] = i; 2567 ierr = PetscFree(iwork);CHKERRQ(ierr); 2568 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2569 } 2570 if (has_null_pressures) { 2571 IS *subs; 2572 PetscInt nsubs,i,j,nl; 2573 const PetscInt *idxs; 2574 PetscScalar *array; 2575 Vec *work; 2576 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2577 2578 subs = pcbddc->local_subs; 2579 nsubs = pcbddc->n_local_subs; 2580 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2581 if (checkb) { 2582 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2583 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2584 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2585 /* work[0] = 1_p */ 2586 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2587 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2588 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2589 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2590 /* work[0] = 1_v */ 2591 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2592 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2593 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2594 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2595 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2596 } 2597 if (nsubs > 1) { 2598 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2599 for (i=0;i<nsubs;i++) { 2600 ISLocalToGlobalMapping l2g; 2601 IS t_zerodiag_subs; 2602 PetscInt nl; 2603 2604 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2605 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2606 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2607 if (nl) { 2608 PetscBool valid = PETSC_TRUE; 2609 2610 if (checkb) { 2611 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2612 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2613 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2614 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2615 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2616 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2617 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2618 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2619 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2620 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2621 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2622 for (j=0;j<n_interior_dofs;j++) { 2623 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2624 valid = PETSC_FALSE; 2625 break; 2626 } 2627 } 2628 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2629 } 2630 if (valid && nneu) { 2631 const PetscInt *idxs; 2632 PetscInt nzb; 2633 2634 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2635 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2636 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2637 if (nzb) valid = PETSC_FALSE; 2638 } 2639 if (valid && pressures) { 2640 IS t_pressure_subs; 2641 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2642 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2643 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2644 } 2645 if (valid) { 2646 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2647 pcbddc->benign_n++; 2648 } else { 2649 recompute_zerodiag = PETSC_TRUE; 2650 } 2651 } 2652 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2653 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2654 } 2655 } else { /* there's just one subdomain (or zero if they have not been detected */ 2656 PetscBool valid = PETSC_TRUE; 2657 2658 if (nneu) valid = PETSC_FALSE; 2659 if (valid && pressures) { 2660 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2661 } 2662 if (valid && checkb) { 2663 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2664 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2665 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2666 for (j=0;j<n_interior_dofs;j++) { 2667 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2668 valid = PETSC_FALSE; 2669 break; 2670 } 2671 } 2672 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2673 } 2674 if (valid) { 2675 pcbddc->benign_n = 1; 2676 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2677 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2678 zerodiag_subs[0] = zerodiag; 2679 } 2680 } 2681 if (checkb) { 2682 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2683 } 2684 } 2685 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2686 2687 if (!pcbddc->benign_n) { 2688 PetscInt n; 2689 2690 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2691 recompute_zerodiag = PETSC_FALSE; 2692 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2693 if (n) { 2694 has_null_pressures = PETSC_FALSE; 2695 have_null = PETSC_FALSE; 2696 } 2697 } 2698 2699 /* final check for null pressures */ 2700 if (zerodiag && pressures) { 2701 PetscInt nz,np; 2702 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2703 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2704 if (nz != np) have_null = PETSC_FALSE; 2705 } 2706 2707 if (recompute_zerodiag) { 2708 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2709 if (pcbddc->benign_n == 1) { 2710 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2711 zerodiag = zerodiag_subs[0]; 2712 } else { 2713 PetscInt i,nzn,*new_idxs; 2714 2715 nzn = 0; 2716 for (i=0;i<pcbddc->benign_n;i++) { 2717 PetscInt ns; 2718 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2719 nzn += ns; 2720 } 2721 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2722 nzn = 0; 2723 for (i=0;i<pcbddc->benign_n;i++) { 2724 PetscInt ns,*idxs; 2725 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2726 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2727 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2728 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2729 nzn += ns; 2730 } 2731 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2732 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2733 } 2734 have_null = PETSC_FALSE; 2735 } 2736 2737 /* Prepare matrix to compute no-net-flux */ 2738 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2739 Mat A,loc_divudotp; 2740 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2741 IS row,col,isused = NULL; 2742 PetscInt M,N,n,st,n_isused; 2743 2744 if (pressures) { 2745 isused = pressures; 2746 } else { 2747 isused = zerodiag_save; 2748 } 2749 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2750 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2751 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2752 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2753 n_isused = 0; 2754 if (isused) { 2755 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2756 } 2757 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2758 st = st-n_isused; 2759 if (n) { 2760 const PetscInt *gidxs; 2761 2762 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2763 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2764 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2765 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2766 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2767 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2768 } else { 2769 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2770 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2771 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2772 } 2773 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2774 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2775 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2776 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2777 ierr = ISDestroy(&row);CHKERRQ(ierr); 2778 ierr = ISDestroy(&col);CHKERRQ(ierr); 2779 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2780 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2781 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2782 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2783 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2784 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2785 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2786 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2787 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2788 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2789 } 2790 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2791 2792 /* change of basis and p0 dofs */ 2793 if (has_null_pressures) { 2794 IS zerodiagc; 2795 const PetscInt *idxs,*idxsc; 2796 PetscInt i,s,*nnz; 2797 2798 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2799 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2800 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2801 /* local change of basis for pressures */ 2802 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2803 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2804 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2805 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2806 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2807 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2808 for (i=0;i<pcbddc->benign_n;i++) { 2809 PetscInt nzs,j; 2810 2811 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2812 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2813 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2814 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2815 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2816 } 2817 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2818 ierr = PetscFree(nnz);CHKERRQ(ierr); 2819 /* set identity on velocities */ 2820 for (i=0;i<n-nz;i++) { 2821 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2822 } 2823 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2824 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2825 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2826 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2827 /* set change on pressures */ 2828 for (s=0;s<pcbddc->benign_n;s++) { 2829 PetscScalar *array; 2830 PetscInt nzs; 2831 2832 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2833 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2834 for (i=0;i<nzs-1;i++) { 2835 PetscScalar vals[2]; 2836 PetscInt cols[2]; 2837 2838 cols[0] = idxs[i]; 2839 cols[1] = idxs[nzs-1]; 2840 vals[0] = 1.; 2841 vals[1] = 1.; 2842 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2843 } 2844 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2845 for (i=0;i<nzs-1;i++) array[i] = -1.; 2846 array[nzs-1] = 1.; 2847 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2848 /* store local idxs for p0 */ 2849 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2850 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2851 ierr = PetscFree(array);CHKERRQ(ierr); 2852 } 2853 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2854 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2855 /* project if needed */ 2856 if (pcbddc->benign_change_explicit) { 2857 Mat M; 2858 2859 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2860 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2861 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2862 ierr = MatDestroy(&M);CHKERRQ(ierr); 2863 } 2864 /* store global idxs for p0 */ 2865 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2866 } 2867 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2868 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2869 2870 /* determines if the coarse solver will be singular or not */ 2871 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2872 /* determines if the problem has subdomains with 0 pressure block */ 2873 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2874 *zerodiaglocal = zerodiag; 2875 PetscFunctionReturn(0); 2876 } 2877 2878 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2879 { 2880 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2881 PetscScalar *array; 2882 PetscErrorCode ierr; 2883 2884 PetscFunctionBegin; 2885 if (!pcbddc->benign_sf) { 2886 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2887 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2888 } 2889 if (get) { 2890 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2891 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2892 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2893 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2894 } else { 2895 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2896 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2897 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2898 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2899 } 2900 PetscFunctionReturn(0); 2901 } 2902 2903 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2904 { 2905 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2906 PetscErrorCode ierr; 2907 2908 PetscFunctionBegin; 2909 /* TODO: add error checking 2910 - avoid nested pop (or push) calls. 2911 - cannot push before pop. 2912 - cannot call this if pcbddc->local_mat is NULL 2913 */ 2914 if (!pcbddc->benign_n) { 2915 PetscFunctionReturn(0); 2916 } 2917 if (pop) { 2918 if (pcbddc->benign_change_explicit) { 2919 IS is_p0; 2920 MatReuse reuse; 2921 2922 /* extract B_0 */ 2923 reuse = MAT_INITIAL_MATRIX; 2924 if (pcbddc->benign_B0) { 2925 reuse = MAT_REUSE_MATRIX; 2926 } 2927 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2928 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2929 /* remove rows and cols from local problem */ 2930 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2931 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2932 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2933 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2934 } else { 2935 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2936 PetscScalar *vals; 2937 PetscInt i,n,*idxs_ins; 2938 2939 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2940 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2941 if (!pcbddc->benign_B0) { 2942 PetscInt *nnz; 2943 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2944 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2945 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2946 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2947 for (i=0;i<pcbddc->benign_n;i++) { 2948 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2949 nnz[i] = n - nnz[i]; 2950 } 2951 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2952 ierr = PetscFree(nnz);CHKERRQ(ierr); 2953 } 2954 2955 for (i=0;i<pcbddc->benign_n;i++) { 2956 PetscScalar *array; 2957 PetscInt *idxs,j,nz,cum; 2958 2959 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2960 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2961 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2962 for (j=0;j<nz;j++) vals[j] = 1.; 2963 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2964 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2965 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2966 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2967 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2968 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2969 cum = 0; 2970 for (j=0;j<n;j++) { 2971 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2972 vals[cum] = array[j]; 2973 idxs_ins[cum] = j; 2974 cum++; 2975 } 2976 } 2977 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2978 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2979 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2980 } 2981 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2982 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2983 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2984 } 2985 } else { /* push */ 2986 if (pcbddc->benign_change_explicit) { 2987 PetscInt i; 2988 2989 for (i=0;i<pcbddc->benign_n;i++) { 2990 PetscScalar *B0_vals; 2991 PetscInt *B0_cols,B0_ncol; 2992 2993 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2994 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2995 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2996 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2997 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2998 } 2999 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3000 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3001 } else { 3002 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 3003 } 3004 } 3005 PetscFunctionReturn(0); 3006 } 3007 3008 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3009 { 3010 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3011 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3012 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3013 PetscBLASInt *B_iwork,*B_ifail; 3014 PetscScalar *work,lwork; 3015 PetscScalar *St,*S,*eigv; 3016 PetscScalar *Sarray,*Starray; 3017 PetscReal *eigs,thresh; 3018 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3019 PetscBool allocated_S_St; 3020 #if defined(PETSC_USE_COMPLEX) 3021 PetscReal *rwork; 3022 #endif 3023 PetscErrorCode ierr; 3024 3025 PetscFunctionBegin; 3026 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3027 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3028 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\nRerun with -sub_schurs_hermitian 1 -sub_schurs_posdef 1 if the problem is SPD",sub_schurs->is_hermitian,sub_schurs->is_posdef); 3029 3030 if (pcbddc->dbg_flag) { 3031 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3032 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3033 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3034 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3035 } 3036 3037 if (pcbddc->dbg_flag) { 3038 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 3039 } 3040 3041 /* max size of subsets */ 3042 mss = 0; 3043 for (i=0;i<sub_schurs->n_subs;i++) { 3044 PetscInt subset_size; 3045 3046 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3047 mss = PetscMax(mss,subset_size); 3048 } 3049 3050 /* min/max and threshold */ 3051 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3052 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3053 nmax = PetscMax(nmin,nmax); 3054 allocated_S_St = PETSC_FALSE; 3055 if (nmin) { 3056 allocated_S_St = PETSC_TRUE; 3057 } 3058 3059 /* allocate lapack workspace */ 3060 cum = cum2 = 0; 3061 maxneigs = 0; 3062 for (i=0;i<sub_schurs->n_subs;i++) { 3063 PetscInt n,subset_size; 3064 3065 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3066 n = PetscMin(subset_size,nmax); 3067 cum += subset_size; 3068 cum2 += subset_size*n; 3069 maxneigs = PetscMax(maxneigs,n); 3070 } 3071 if (mss) { 3072 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 3073 PetscBLASInt B_itype = 1; 3074 PetscBLASInt B_N = mss; 3075 PetscReal zero = 0.0; 3076 PetscReal eps = 0.0; /* dlamch? */ 3077 3078 B_lwork = -1; 3079 S = NULL; 3080 St = NULL; 3081 eigs = NULL; 3082 eigv = NULL; 3083 B_iwork = NULL; 3084 B_ifail = NULL; 3085 #if defined(PETSC_USE_COMPLEX) 3086 rwork = NULL; 3087 #endif 3088 thresh = 1.0; 3089 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3090 #if defined(PETSC_USE_COMPLEX) 3091 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3092 #else 3093 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3094 #endif 3095 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3096 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3097 } else { 3098 /* TODO */ 3099 } 3100 } else { 3101 lwork = 0; 3102 } 3103 3104 nv = 0; 3105 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3106 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3107 } 3108 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3109 if (allocated_S_St) { 3110 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3111 } 3112 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3113 #if defined(PETSC_USE_COMPLEX) 3114 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3115 #endif 3116 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3117 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3118 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3119 nv+cum,&pcbddc->adaptive_constraints_idxs, 3120 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3121 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 3122 3123 maxneigs = 0; 3124 cum = cumarray = 0; 3125 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3126 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3127 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3128 const PetscInt *idxs; 3129 3130 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3131 for (cum=0;cum<nv;cum++) { 3132 pcbddc->adaptive_constraints_n[cum] = 1; 3133 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3134 pcbddc->adaptive_constraints_data[cum] = 1.0; 3135 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3136 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3137 } 3138 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3139 } 3140 3141 if (mss) { /* multilevel */ 3142 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3143 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3144 } 3145 3146 thresh = pcbddc->adaptive_threshold; 3147 for (i=0;i<sub_schurs->n_subs;i++) { 3148 const PetscInt *idxs; 3149 PetscReal upper,lower; 3150 PetscInt j,subset_size,eigs_start = 0; 3151 PetscBLASInt B_N; 3152 PetscBool same_data = PETSC_FALSE; 3153 3154 if (pcbddc->use_deluxe_scaling) { 3155 upper = PETSC_MAX_REAL; 3156 lower = thresh; 3157 } else { 3158 upper = 1./thresh; 3159 lower = 0.; 3160 } 3161 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3162 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3163 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3164 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3165 if (sub_schurs->is_hermitian) { 3166 PetscInt j,k; 3167 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 3168 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3169 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3170 } 3171 for (j=0;j<subset_size;j++) { 3172 for (k=j;k<subset_size;k++) { 3173 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3174 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3175 } 3176 } 3177 } else { 3178 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3179 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3180 } 3181 } else { 3182 S = Sarray + cumarray; 3183 St = Starray + cumarray; 3184 } 3185 /* see if we can save some work */ 3186 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3187 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 3188 } 3189 3190 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3191 B_neigs = 0; 3192 } else { 3193 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 3194 PetscBLASInt B_itype = 1; 3195 PetscBLASInt B_IL, B_IU; 3196 PetscReal eps = -1.0; /* dlamch? */ 3197 PetscInt nmin_s; 3198 PetscBool compute_range = PETSC_FALSE; 3199 3200 if (pcbddc->dbg_flag) { 3201 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]); 3202 } 3203 3204 compute_range = PETSC_FALSE; 3205 if (thresh > 1.+PETSC_SMALL && !same_data) { 3206 compute_range = PETSC_TRUE; 3207 } 3208 3209 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3210 if (compute_range) { 3211 3212 /* ask for eigenvalues larger than thresh */ 3213 #if defined(PETSC_USE_COMPLEX) 3214 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3215 #else 3216 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3217 #endif 3218 } else if (!same_data) { 3219 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3220 B_IL = 1; 3221 #if defined(PETSC_USE_COMPLEX) 3222 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3223 #else 3224 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3225 #endif 3226 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3227 PetscInt k; 3228 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3229 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3230 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3231 nmin = nmax; 3232 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 3233 for (k=0;k<nmax;k++) { 3234 eigs[k] = 1./PETSC_SMALL; 3235 eigv[k*(subset_size+1)] = 1.0; 3236 } 3237 } 3238 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3239 if (B_ierr) { 3240 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3241 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3242 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3243 } 3244 3245 if (B_neigs > nmax) { 3246 if (pcbddc->dbg_flag) { 3247 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 3248 } 3249 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 3250 B_neigs = nmax; 3251 } 3252 3253 nmin_s = PetscMin(nmin,B_N); 3254 if (B_neigs < nmin_s) { 3255 PetscBLASInt B_neigs2; 3256 3257 if (pcbddc->use_deluxe_scaling) { 3258 B_IL = B_N - nmin_s + 1; 3259 B_IU = B_N - B_neigs; 3260 } else { 3261 B_IL = B_neigs + 1; 3262 B_IU = nmin_s; 3263 } 3264 if (pcbddc->dbg_flag) { 3265 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 3266 } 3267 if (sub_schurs->is_hermitian) { 3268 PetscInt j,k; 3269 for (j=0;j<subset_size;j++) { 3270 for (k=j;k<subset_size;k++) { 3271 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3272 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3273 } 3274 } 3275 } else { 3276 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3277 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3278 } 3279 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3280 #if defined(PETSC_USE_COMPLEX) 3281 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3282 #else 3283 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3284 #endif 3285 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3286 B_neigs += B_neigs2; 3287 } 3288 if (B_ierr) { 3289 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3290 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3291 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3292 } 3293 if (pcbddc->dbg_flag) { 3294 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3295 for (j=0;j<B_neigs;j++) { 3296 if (eigs[j] == 0.0) { 3297 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3298 } else { 3299 if (pcbddc->use_deluxe_scaling) { 3300 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3301 } else { 3302 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3303 } 3304 } 3305 } 3306 } 3307 } else { 3308 /* TODO */ 3309 } 3310 } 3311 /* change the basis back to the original one */ 3312 if (sub_schurs->change) { 3313 Mat change,phi,phit; 3314 3315 if (pcbddc->dbg_flag > 2) { 3316 PetscInt ii; 3317 for (ii=0;ii<B_neigs;ii++) { 3318 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3319 for (j=0;j<B_N;j++) { 3320 #if defined(PETSC_USE_COMPLEX) 3321 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3322 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3323 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3324 #else 3325 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3326 #endif 3327 } 3328 } 3329 } 3330 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3331 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3332 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3333 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3334 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3335 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3336 } 3337 maxneigs = PetscMax(B_neigs,maxneigs); 3338 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3339 if (B_neigs) { 3340 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3341 3342 if (pcbddc->dbg_flag > 1) { 3343 PetscInt ii; 3344 for (ii=0;ii<B_neigs;ii++) { 3345 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3346 for (j=0;j<B_N;j++) { 3347 #if defined(PETSC_USE_COMPLEX) 3348 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3349 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3350 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3351 #else 3352 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3353 #endif 3354 } 3355 } 3356 } 3357 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3358 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3359 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3360 cum++; 3361 } 3362 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3363 /* shift for next computation */ 3364 cumarray += subset_size*subset_size; 3365 } 3366 if (pcbddc->dbg_flag) { 3367 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3368 } 3369 3370 if (mss) { 3371 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3372 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3373 /* destroy matrices (junk) */ 3374 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3375 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3376 } 3377 if (allocated_S_St) { 3378 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3379 } 3380 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3381 #if defined(PETSC_USE_COMPLEX) 3382 ierr = PetscFree(rwork);CHKERRQ(ierr); 3383 #endif 3384 if (pcbddc->dbg_flag) { 3385 PetscInt maxneigs_r; 3386 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3387 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3388 } 3389 PetscFunctionReturn(0); 3390 } 3391 3392 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3393 { 3394 PetscScalar *coarse_submat_vals; 3395 PetscErrorCode ierr; 3396 3397 PetscFunctionBegin; 3398 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3399 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3400 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3401 3402 /* Setup local neumann solver ksp_R */ 3403 /* PCBDDCSetUpLocalScatters should be called first! */ 3404 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3405 3406 /* 3407 Setup local correction and local part of coarse basis. 3408 Gives back the dense local part of the coarse matrix in column major ordering 3409 */ 3410 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3411 3412 /* Compute total number of coarse nodes and setup coarse solver */ 3413 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3414 3415 /* free */ 3416 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3417 PetscFunctionReturn(0); 3418 } 3419 3420 PetscErrorCode PCBDDCResetCustomization(PC pc) 3421 { 3422 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3423 PetscErrorCode ierr; 3424 3425 PetscFunctionBegin; 3426 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3427 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3428 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3429 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3430 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3431 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3432 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3433 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3434 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3435 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3436 PetscFunctionReturn(0); 3437 } 3438 3439 PetscErrorCode PCBDDCResetTopography(PC pc) 3440 { 3441 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3442 PetscInt i; 3443 PetscErrorCode ierr; 3444 3445 PetscFunctionBegin; 3446 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3447 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3448 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3449 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3450 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3451 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3452 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3453 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3454 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3455 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3456 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3457 for (i=0;i<pcbddc->n_local_subs;i++) { 3458 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3459 } 3460 pcbddc->n_local_subs = 0; 3461 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3462 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3463 pcbddc->graphanalyzed = PETSC_FALSE; 3464 pcbddc->recompute_topography = PETSC_TRUE; 3465 PetscFunctionReturn(0); 3466 } 3467 3468 PetscErrorCode PCBDDCResetSolvers(PC pc) 3469 { 3470 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3471 PetscErrorCode ierr; 3472 3473 PetscFunctionBegin; 3474 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3475 if (pcbddc->coarse_phi_B) { 3476 PetscScalar *array; 3477 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3478 ierr = PetscFree(array);CHKERRQ(ierr); 3479 } 3480 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3481 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3482 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3483 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3484 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3485 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3486 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3487 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3488 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3489 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3490 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3491 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3492 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3493 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3494 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3495 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3496 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3497 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3498 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3499 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3500 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3501 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3502 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3503 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3504 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3505 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3506 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3507 if (pcbddc->benign_zerodiag_subs) { 3508 PetscInt i; 3509 for (i=0;i<pcbddc->benign_n;i++) { 3510 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3511 } 3512 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3513 } 3514 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3515 PetscFunctionReturn(0); 3516 } 3517 3518 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3519 { 3520 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3521 PC_IS *pcis = (PC_IS*)pc->data; 3522 VecType impVecType; 3523 PetscInt n_constraints,n_R,old_size; 3524 PetscErrorCode ierr; 3525 3526 PetscFunctionBegin; 3527 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3528 n_R = pcis->n - pcbddc->n_vertices; 3529 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3530 /* local work vectors (try to avoid unneeded work)*/ 3531 /* R nodes */ 3532 old_size = -1; 3533 if (pcbddc->vec1_R) { 3534 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3535 } 3536 if (n_R != old_size) { 3537 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3538 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3539 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3540 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3541 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3542 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3543 } 3544 /* local primal dofs */ 3545 old_size = -1; 3546 if (pcbddc->vec1_P) { 3547 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3548 } 3549 if (pcbddc->local_primal_size != old_size) { 3550 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3551 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3552 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3553 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3554 } 3555 /* local explicit constraints */ 3556 old_size = -1; 3557 if (pcbddc->vec1_C) { 3558 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3559 } 3560 if (n_constraints && n_constraints != old_size) { 3561 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3562 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3563 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3564 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3565 } 3566 PetscFunctionReturn(0); 3567 } 3568 3569 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3570 { 3571 PetscErrorCode ierr; 3572 /* pointers to pcis and pcbddc */ 3573 PC_IS* pcis = (PC_IS*)pc->data; 3574 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3575 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3576 /* submatrices of local problem */ 3577 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3578 /* submatrices of local coarse problem */ 3579 Mat S_VV,S_CV,S_VC,S_CC; 3580 /* working matrices */ 3581 Mat C_CR; 3582 /* additional working stuff */ 3583 PC pc_R; 3584 Mat F,Brhs = NULL; 3585 Vec dummy_vec; 3586 PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs; 3587 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3588 PetscScalar *work; 3589 PetscInt *idx_V_B; 3590 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3591 PetscInt i,n_R,n_D,n_B; 3592 3593 /* some shortcuts to scalars */ 3594 PetscScalar one=1.0,m_one=-1.0; 3595 3596 PetscFunctionBegin; 3597 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3598 3599 /* Set Non-overlapping dimensions */ 3600 n_vertices = pcbddc->n_vertices; 3601 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3602 n_B = pcis->n_B; 3603 n_D = pcis->n - n_B; 3604 n_R = pcis->n - n_vertices; 3605 3606 /* vertices in boundary numbering */ 3607 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3608 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3609 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3610 3611 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3612 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3613 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3614 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3615 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3616 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3617 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3618 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3619 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3620 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3621 3622 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3623 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3624 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3625 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3626 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3627 lda_rhs = n_R; 3628 need_benign_correction = PETSC_FALSE; 3629 if (isLU || isILU || isCHOL) { 3630 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3631 } else if (sub_schurs && sub_schurs->reuse_solver) { 3632 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3633 MatFactorType type; 3634 3635 F = reuse_solver->F; 3636 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3637 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3638 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3639 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3640 } else { 3641 F = NULL; 3642 } 3643 3644 /* determine if we can use a sparse right-hand side */ 3645 sparserhs = PETSC_FALSE; 3646 if (F) { 3647 MatSolverType solver; 3648 3649 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3650 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3651 } 3652 3653 /* allocate workspace */ 3654 n = 0; 3655 if (n_constraints) { 3656 n += lda_rhs*n_constraints; 3657 } 3658 if (n_vertices) { 3659 n = PetscMax(2*lda_rhs*n_vertices,n); 3660 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3661 } 3662 if (!pcbddc->symmetric_primal) { 3663 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3664 } 3665 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3666 3667 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3668 dummy_vec = NULL; 3669 if (need_benign_correction && lda_rhs != n_R && F) { 3670 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3671 } 3672 3673 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3674 if (n_constraints) { 3675 Mat M3,C_B; 3676 IS is_aux; 3677 PetscScalar *array,*array2; 3678 3679 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3680 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3681 3682 /* Extract constraints on R nodes: C_{CR} */ 3683 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3684 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3685 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3686 3687 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3688 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3689 if (!sparserhs) { 3690 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3691 for (i=0;i<n_constraints;i++) { 3692 const PetscScalar *row_cmat_values; 3693 const PetscInt *row_cmat_indices; 3694 PetscInt size_of_constraint,j; 3695 3696 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3697 for (j=0;j<size_of_constraint;j++) { 3698 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3699 } 3700 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3701 } 3702 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 3703 } else { 3704 Mat tC_CR; 3705 3706 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3707 if (lda_rhs != n_R) { 3708 PetscScalar *aa; 3709 PetscInt r,*ii,*jj; 3710 PetscBool done; 3711 3712 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3713 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 3714 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 3715 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 3716 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3717 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 3718 } else { 3719 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 3720 tC_CR = C_CR; 3721 } 3722 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 3723 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 3724 } 3725 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3726 if (F) { 3727 if (need_benign_correction) { 3728 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3729 3730 /* rhs is already zero on interior dofs, no need to change the rhs */ 3731 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3732 } 3733 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 3734 if (need_benign_correction) { 3735 PetscScalar *marr; 3736 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3737 3738 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3739 if (lda_rhs != n_R) { 3740 for (i=0;i<n_constraints;i++) { 3741 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3742 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3743 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3744 } 3745 } else { 3746 for (i=0;i<n_constraints;i++) { 3747 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3748 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3749 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3750 } 3751 } 3752 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3753 } 3754 } else { 3755 PetscScalar *marr; 3756 3757 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3758 for (i=0;i<n_constraints;i++) { 3759 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3760 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3761 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3762 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3763 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3764 } 3765 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3766 } 3767 if (sparserhs) { 3768 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3769 } 3770 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3771 if (!pcbddc->switch_static) { 3772 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3773 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3774 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3775 for (i=0;i<n_constraints;i++) { 3776 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3777 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3778 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3779 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3780 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3781 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3782 } 3783 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3784 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3785 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3786 } else { 3787 if (lda_rhs != n_R) { 3788 IS dummy; 3789 3790 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3791 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3792 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3793 } else { 3794 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3795 pcbddc->local_auxmat2 = local_auxmat2_R; 3796 } 3797 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3798 } 3799 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3800 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3801 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3802 if (isCHOL) { 3803 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3804 } else { 3805 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3806 } 3807 ierr = MatSeqDenseInvertFactors_Private(M3);CHKERRQ(ierr); 3808 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3809 ierr = MatMatMult(M3,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3810 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3811 ierr = MatCopy(M3,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3812 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3813 } 3814 3815 /* Get submatrices from subdomain matrix */ 3816 if (n_vertices) { 3817 IS is_aux; 3818 PetscBool isseqaij; 3819 3820 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3821 IS tis; 3822 3823 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3824 ierr = ISSort(tis);CHKERRQ(ierr); 3825 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3826 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3827 } else { 3828 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3829 } 3830 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3831 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3832 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3833 if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */ 3834 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3835 } 3836 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3837 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3838 } 3839 3840 /* Matrix of coarse basis functions (local) */ 3841 if (pcbddc->coarse_phi_B) { 3842 PetscInt on_B,on_primal,on_D=n_D; 3843 if (pcbddc->coarse_phi_D) { 3844 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3845 } 3846 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3847 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3848 PetscScalar *marray; 3849 3850 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3851 ierr = PetscFree(marray);CHKERRQ(ierr); 3852 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3853 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3854 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3855 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3856 } 3857 } 3858 3859 if (!pcbddc->coarse_phi_B) { 3860 PetscScalar *marr; 3861 3862 /* memory size */ 3863 n = n_B*pcbddc->local_primal_size; 3864 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 3865 if (!pcbddc->symmetric_primal) n *= 2; 3866 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 3867 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3868 marr += n_B*pcbddc->local_primal_size; 3869 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3870 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3871 marr += n_D*pcbddc->local_primal_size; 3872 } 3873 if (!pcbddc->symmetric_primal) { 3874 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3875 marr += n_B*pcbddc->local_primal_size; 3876 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3877 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3878 } 3879 } else { 3880 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3881 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3882 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3883 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3884 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3885 } 3886 } 3887 } 3888 3889 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3890 p0_lidx_I = NULL; 3891 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3892 const PetscInt *idxs; 3893 3894 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3895 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3896 for (i=0;i<pcbddc->benign_n;i++) { 3897 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3898 } 3899 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3900 } 3901 3902 /* vertices */ 3903 if (n_vertices) { 3904 PetscBool restoreavr = PETSC_FALSE; 3905 3906 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3907 3908 if (n_R) { 3909 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3910 PetscBLASInt B_N,B_one = 1; 3911 PetscScalar *x,*y; 3912 3913 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3914 if (need_benign_correction) { 3915 ISLocalToGlobalMapping RtoN; 3916 IS is_p0; 3917 PetscInt *idxs_p0,n; 3918 3919 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3920 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3921 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3922 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3923 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3924 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3925 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3926 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3927 } 3928 3929 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3930 if (!sparserhs || need_benign_correction) { 3931 if (lda_rhs == n_R) { 3932 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3933 } else { 3934 PetscScalar *av,*array; 3935 const PetscInt *xadj,*adjncy; 3936 PetscInt n; 3937 PetscBool flg_row; 3938 3939 array = work+lda_rhs*n_vertices; 3940 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3941 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3942 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3943 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3944 for (i=0;i<n;i++) { 3945 PetscInt j; 3946 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3947 } 3948 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3949 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3950 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3951 } 3952 if (need_benign_correction) { 3953 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3954 PetscScalar *marr; 3955 3956 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3957 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3958 3959 | 0 0 0 | (V) 3960 L = | 0 0 -1 | (P-p0) 3961 | 0 0 -1 | (p0) 3962 3963 */ 3964 for (i=0;i<reuse_solver->benign_n;i++) { 3965 const PetscScalar *vals; 3966 const PetscInt *idxs,*idxs_zero; 3967 PetscInt n,j,nz; 3968 3969 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3970 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3971 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3972 for (j=0;j<n;j++) { 3973 PetscScalar val = vals[j]; 3974 PetscInt k,col = idxs[j]; 3975 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3976 } 3977 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3978 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3979 } 3980 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3981 } 3982 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 3983 Brhs = A_RV; 3984 } else { 3985 Mat tA_RVT,A_RVT; 3986 3987 if (!pcbddc->symmetric_primal) { 3988 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 3989 } else { 3990 restoreavr = PETSC_TRUE; 3991 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 3992 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 3993 A_RVT = A_VR; 3994 } 3995 if (lda_rhs != n_R) { 3996 PetscScalar *aa; 3997 PetscInt r,*ii,*jj; 3998 PetscBool done; 3999 4000 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4001 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4002 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4003 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4004 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4005 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4006 } else { 4007 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4008 tA_RVT = A_RVT; 4009 } 4010 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4011 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4012 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4013 } 4014 if (F) { 4015 /* need to correct the rhs */ 4016 if (need_benign_correction) { 4017 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4018 PetscScalar *marr; 4019 4020 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4021 if (lda_rhs != n_R) { 4022 for (i=0;i<n_vertices;i++) { 4023 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4024 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4025 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4026 } 4027 } else { 4028 for (i=0;i<n_vertices;i++) { 4029 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4030 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4031 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4032 } 4033 } 4034 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4035 } 4036 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4037 if (restoreavr) { 4038 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4039 } 4040 /* need to correct the solution */ 4041 if (need_benign_correction) { 4042 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4043 PetscScalar *marr; 4044 4045 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4046 if (lda_rhs != n_R) { 4047 for (i=0;i<n_vertices;i++) { 4048 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4049 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4050 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4051 } 4052 } else { 4053 for (i=0;i<n_vertices;i++) { 4054 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4055 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4056 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4057 } 4058 } 4059 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4060 } 4061 } else { 4062 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4063 for (i=0;i<n_vertices;i++) { 4064 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4065 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4066 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4067 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4068 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4069 } 4070 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4071 } 4072 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4073 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4074 /* S_VV and S_CV */ 4075 if (n_constraints) { 4076 Mat B; 4077 4078 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4079 for (i=0;i<n_vertices;i++) { 4080 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4081 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4082 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4083 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4084 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4085 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4086 } 4087 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4088 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 4089 ierr = MatDestroy(&B);CHKERRQ(ierr); 4090 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4091 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4092 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4093 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4094 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4095 ierr = MatDestroy(&B);CHKERRQ(ierr); 4096 } 4097 if (lda_rhs != n_R) { 4098 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4099 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4100 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4101 } 4102 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4103 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4104 if (need_benign_correction) { 4105 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4106 PetscScalar *marr,*sums; 4107 4108 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4109 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4110 for (i=0;i<reuse_solver->benign_n;i++) { 4111 const PetscScalar *vals; 4112 const PetscInt *idxs,*idxs_zero; 4113 PetscInt n,j,nz; 4114 4115 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4116 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4117 for (j=0;j<n_vertices;j++) { 4118 PetscInt k; 4119 sums[j] = 0.; 4120 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4121 } 4122 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4123 for (j=0;j<n;j++) { 4124 PetscScalar val = vals[j]; 4125 PetscInt k; 4126 for (k=0;k<n_vertices;k++) { 4127 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4128 } 4129 } 4130 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4131 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4132 } 4133 ierr = PetscFree(sums);CHKERRQ(ierr); 4134 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4135 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4136 } 4137 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4138 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4139 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 4140 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4141 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4142 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 4143 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4144 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4145 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4146 } else { 4147 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4148 } 4149 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4150 4151 /* coarse basis functions */ 4152 for (i=0;i<n_vertices;i++) { 4153 PetscScalar *y; 4154 4155 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4156 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4157 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4158 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4159 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4160 y[n_B*i+idx_V_B[i]] = 1.0; 4161 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4162 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4163 4164 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4165 PetscInt j; 4166 4167 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4168 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4169 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4170 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4171 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4172 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4173 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4174 } 4175 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4176 } 4177 /* if n_R == 0 the object is not destroyed */ 4178 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4179 } 4180 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4181 4182 if (n_constraints) { 4183 Mat B; 4184 4185 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4186 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4187 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4188 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4189 if (n_vertices) { 4190 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4191 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4192 } else { 4193 Mat S_VCt; 4194 4195 if (lda_rhs != n_R) { 4196 ierr = MatDestroy(&B);CHKERRQ(ierr); 4197 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4198 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4199 } 4200 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4201 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4202 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4203 } 4204 } 4205 ierr = MatDestroy(&B);CHKERRQ(ierr); 4206 /* coarse basis functions */ 4207 for (i=0;i<n_constraints;i++) { 4208 PetscScalar *y; 4209 4210 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4211 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4212 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4213 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4214 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4215 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4216 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4217 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4218 PetscInt j; 4219 4220 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4221 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4222 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4223 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4224 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4225 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4226 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4227 } 4228 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4229 } 4230 } 4231 if (n_constraints) { 4232 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4233 } 4234 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4235 4236 /* coarse matrix entries relative to B_0 */ 4237 if (pcbddc->benign_n) { 4238 Mat B0_B,B0_BPHI; 4239 IS is_dummy; 4240 PetscScalar *data; 4241 PetscInt j; 4242 4243 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4244 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4245 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4246 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4247 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4248 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 4249 for (j=0;j<pcbddc->benign_n;j++) { 4250 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4251 for (i=0;i<pcbddc->local_primal_size;i++) { 4252 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4253 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4254 } 4255 } 4256 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 4257 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4258 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4259 } 4260 4261 /* compute other basis functions for non-symmetric problems */ 4262 if (!pcbddc->symmetric_primal) { 4263 Mat B_V=NULL,B_C=NULL; 4264 PetscScalar *marray; 4265 4266 if (n_constraints) { 4267 Mat S_CCT,C_CRT; 4268 4269 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4270 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4271 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4272 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4273 if (n_vertices) { 4274 Mat S_VCT; 4275 4276 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4277 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4278 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4279 } 4280 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4281 } else { 4282 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4283 } 4284 if (n_vertices && n_R) { 4285 PetscScalar *av,*marray; 4286 const PetscInt *xadj,*adjncy; 4287 PetscInt n; 4288 PetscBool flg_row; 4289 4290 /* B_V = B_V - A_VR^T */ 4291 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4292 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4293 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4294 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4295 for (i=0;i<n;i++) { 4296 PetscInt j; 4297 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4298 } 4299 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4300 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4301 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4302 } 4303 4304 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4305 if (n_vertices) { 4306 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4307 for (i=0;i<n_vertices;i++) { 4308 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4309 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4310 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4311 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4312 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4313 } 4314 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4315 } 4316 if (B_C) { 4317 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4318 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4319 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4320 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4321 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4322 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4323 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4324 } 4325 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4326 } 4327 /* coarse basis functions */ 4328 for (i=0;i<pcbddc->local_primal_size;i++) { 4329 PetscScalar *y; 4330 4331 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4332 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4333 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4334 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4335 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4336 if (i<n_vertices) { 4337 y[n_B*i+idx_V_B[i]] = 1.0; 4338 } 4339 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4340 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4341 4342 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4343 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4344 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4345 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4346 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4347 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4348 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4349 } 4350 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4351 } 4352 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4353 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4354 } 4355 4356 /* free memory */ 4357 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4358 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4359 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4360 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4361 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4362 ierr = PetscFree(work);CHKERRQ(ierr); 4363 if (n_vertices) { 4364 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4365 } 4366 if (n_constraints) { 4367 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4368 } 4369 /* Checking coarse_sub_mat and coarse basis functios */ 4370 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4371 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4372 if (pcbddc->dbg_flag) { 4373 Mat coarse_sub_mat; 4374 Mat AUXMAT,TM1,TM2,TM3,TM4; 4375 Mat coarse_phi_D,coarse_phi_B; 4376 Mat coarse_psi_D,coarse_psi_B; 4377 Mat A_II,A_BB,A_IB,A_BI; 4378 Mat C_B,CPHI; 4379 IS is_dummy; 4380 Vec mones; 4381 MatType checkmattype=MATSEQAIJ; 4382 PetscReal real_value; 4383 4384 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4385 Mat A; 4386 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4387 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4388 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4389 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4390 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4391 ierr = MatDestroy(&A);CHKERRQ(ierr); 4392 } else { 4393 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4394 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4395 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4396 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4397 } 4398 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4399 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4400 if (!pcbddc->symmetric_primal) { 4401 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4402 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4403 } 4404 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4405 4406 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4407 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4408 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4409 if (!pcbddc->symmetric_primal) { 4410 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4411 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4412 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4413 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4414 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4415 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4416 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4417 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4418 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4419 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4420 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4421 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4422 } else { 4423 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4424 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4425 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4426 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4427 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4428 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4429 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4430 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4431 } 4432 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4433 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4434 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4435 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4436 if (pcbddc->benign_n) { 4437 Mat B0_B,B0_BPHI; 4438 PetscScalar *data,*data2; 4439 PetscInt j; 4440 4441 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4442 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4443 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4444 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4445 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4446 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4447 for (j=0;j<pcbddc->benign_n;j++) { 4448 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4449 for (i=0;i<pcbddc->local_primal_size;i++) { 4450 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4451 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4452 } 4453 } 4454 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4455 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4456 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4457 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4458 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4459 } 4460 #if 0 4461 { 4462 PetscViewer viewer; 4463 char filename[256]; 4464 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4465 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4466 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4467 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4468 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4469 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4470 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4471 if (pcbddc->coarse_phi_B) { 4472 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4473 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4474 } 4475 if (pcbddc->coarse_phi_D) { 4476 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4477 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4478 } 4479 if (pcbddc->coarse_psi_B) { 4480 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4481 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4482 } 4483 if (pcbddc->coarse_psi_D) { 4484 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4485 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4486 } 4487 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4488 } 4489 #endif 4490 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4491 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4492 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4493 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4494 4495 /* check constraints */ 4496 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4497 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4498 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4499 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4500 } else { 4501 PetscScalar *data; 4502 Mat tmat; 4503 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4504 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4505 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4506 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4507 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4508 } 4509 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4510 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4511 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4512 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4513 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4514 if (!pcbddc->symmetric_primal) { 4515 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4516 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4517 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4518 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4519 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4520 } 4521 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4522 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4523 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4524 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4525 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4526 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4527 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4528 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4529 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4530 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4531 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4532 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4533 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4534 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4535 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4536 if (!pcbddc->symmetric_primal) { 4537 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4538 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4539 } 4540 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4541 } 4542 /* get back data */ 4543 *coarse_submat_vals_n = coarse_submat_vals; 4544 PetscFunctionReturn(0); 4545 } 4546 4547 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4548 { 4549 Mat *work_mat; 4550 IS isrow_s,iscol_s; 4551 PetscBool rsorted,csorted; 4552 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4553 PetscErrorCode ierr; 4554 4555 PetscFunctionBegin; 4556 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4557 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4558 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4559 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4560 4561 if (!rsorted) { 4562 const PetscInt *idxs; 4563 PetscInt *idxs_sorted,i; 4564 4565 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4566 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4567 for (i=0;i<rsize;i++) { 4568 idxs_perm_r[i] = i; 4569 } 4570 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4571 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4572 for (i=0;i<rsize;i++) { 4573 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4574 } 4575 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4576 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4577 } else { 4578 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4579 isrow_s = isrow; 4580 } 4581 4582 if (!csorted) { 4583 if (isrow == iscol) { 4584 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4585 iscol_s = isrow_s; 4586 } else { 4587 const PetscInt *idxs; 4588 PetscInt *idxs_sorted,i; 4589 4590 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4591 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4592 for (i=0;i<csize;i++) { 4593 idxs_perm_c[i] = i; 4594 } 4595 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4596 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4597 for (i=0;i<csize;i++) { 4598 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4599 } 4600 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4601 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4602 } 4603 } else { 4604 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4605 iscol_s = iscol; 4606 } 4607 4608 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4609 4610 if (!rsorted || !csorted) { 4611 Mat new_mat; 4612 IS is_perm_r,is_perm_c; 4613 4614 if (!rsorted) { 4615 PetscInt *idxs_r,i; 4616 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4617 for (i=0;i<rsize;i++) { 4618 idxs_r[idxs_perm_r[i]] = i; 4619 } 4620 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4621 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4622 } else { 4623 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4624 } 4625 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4626 4627 if (!csorted) { 4628 if (isrow_s == iscol_s) { 4629 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4630 is_perm_c = is_perm_r; 4631 } else { 4632 PetscInt *idxs_c,i; 4633 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4634 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4635 for (i=0;i<csize;i++) { 4636 idxs_c[idxs_perm_c[i]] = i; 4637 } 4638 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4639 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4640 } 4641 } else { 4642 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4643 } 4644 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4645 4646 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4647 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4648 work_mat[0] = new_mat; 4649 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4650 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4651 } 4652 4653 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4654 *B = work_mat[0]; 4655 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4656 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4657 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4658 PetscFunctionReturn(0); 4659 } 4660 4661 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4662 { 4663 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4664 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4665 Mat new_mat,lA; 4666 IS is_local,is_global; 4667 PetscInt local_size; 4668 PetscBool isseqaij; 4669 PetscErrorCode ierr; 4670 4671 PetscFunctionBegin; 4672 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4673 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4674 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4675 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4676 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4677 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4678 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4679 4680 /* check */ 4681 if (pcbddc->dbg_flag) { 4682 Vec x,x_change; 4683 PetscReal error; 4684 4685 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4686 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4687 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4688 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4689 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4690 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4691 if (!pcbddc->change_interior) { 4692 const PetscScalar *x,*y,*v; 4693 PetscReal lerror = 0.; 4694 PetscInt i; 4695 4696 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4697 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4698 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4699 for (i=0;i<local_size;i++) 4700 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4701 lerror = PetscAbsScalar(x[i]-y[i]); 4702 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4703 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4704 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4705 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4706 if (error > PETSC_SMALL) { 4707 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4708 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4709 } else { 4710 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4711 } 4712 } 4713 } 4714 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4715 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4716 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4717 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4718 if (error > PETSC_SMALL) { 4719 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4720 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4721 } else { 4722 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4723 } 4724 } 4725 ierr = VecDestroy(&x);CHKERRQ(ierr); 4726 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4727 } 4728 4729 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4730 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4731 4732 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4733 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4734 if (isseqaij) { 4735 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4736 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4737 if (lA) { 4738 Mat work; 4739 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4740 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4741 ierr = MatDestroy(&work);CHKERRQ(ierr); 4742 } 4743 } else { 4744 Mat work_mat; 4745 4746 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4747 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4748 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4749 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4750 if (lA) { 4751 Mat work; 4752 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4753 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4754 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4755 ierr = MatDestroy(&work);CHKERRQ(ierr); 4756 } 4757 } 4758 if (matis->A->symmetric_set) { 4759 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4760 #if !defined(PETSC_USE_COMPLEX) 4761 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4762 #endif 4763 } 4764 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4765 PetscFunctionReturn(0); 4766 } 4767 4768 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4769 { 4770 PC_IS* pcis = (PC_IS*)(pc->data); 4771 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4772 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4773 PetscInt *idx_R_local=NULL; 4774 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4775 PetscInt vbs,bs; 4776 PetscBT bitmask=NULL; 4777 PetscErrorCode ierr; 4778 4779 PetscFunctionBegin; 4780 /* 4781 No need to setup local scatters if 4782 - primal space is unchanged 4783 AND 4784 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4785 AND 4786 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4787 */ 4788 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4789 PetscFunctionReturn(0); 4790 } 4791 /* destroy old objects */ 4792 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4793 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4794 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4795 /* Set Non-overlapping dimensions */ 4796 n_B = pcis->n_B; 4797 n_D = pcis->n - n_B; 4798 n_vertices = pcbddc->n_vertices; 4799 4800 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4801 4802 /* create auxiliary bitmask and allocate workspace */ 4803 if (!sub_schurs || !sub_schurs->reuse_solver) { 4804 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4805 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4806 for (i=0;i<n_vertices;i++) { 4807 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4808 } 4809 4810 for (i=0, n_R=0; i<pcis->n; i++) { 4811 if (!PetscBTLookup(bitmask,i)) { 4812 idx_R_local[n_R++] = i; 4813 } 4814 } 4815 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4816 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4817 4818 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4819 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4820 } 4821 4822 /* Block code */ 4823 vbs = 1; 4824 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4825 if (bs>1 && !(n_vertices%bs)) { 4826 PetscBool is_blocked = PETSC_TRUE; 4827 PetscInt *vary; 4828 if (!sub_schurs || !sub_schurs->reuse_solver) { 4829 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4830 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4831 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4832 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4833 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4834 for (i=0; i<pcis->n/bs; i++) { 4835 if (vary[i]!=0 && vary[i]!=bs) { 4836 is_blocked = PETSC_FALSE; 4837 break; 4838 } 4839 } 4840 ierr = PetscFree(vary);CHKERRQ(ierr); 4841 } else { 4842 /* Verify directly the R set */ 4843 for (i=0; i<n_R/bs; i++) { 4844 PetscInt j,node=idx_R_local[bs*i]; 4845 for (j=1; j<bs; j++) { 4846 if (node != idx_R_local[bs*i+j]-j) { 4847 is_blocked = PETSC_FALSE; 4848 break; 4849 } 4850 } 4851 } 4852 } 4853 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4854 vbs = bs; 4855 for (i=0;i<n_R/vbs;i++) { 4856 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4857 } 4858 } 4859 } 4860 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4861 if (sub_schurs && sub_schurs->reuse_solver) { 4862 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4863 4864 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4865 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4866 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4867 reuse_solver->is_R = pcbddc->is_R_local; 4868 } else { 4869 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4870 } 4871 4872 /* print some info if requested */ 4873 if (pcbddc->dbg_flag) { 4874 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4875 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4876 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4877 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4878 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4879 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4880 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4881 } 4882 4883 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4884 if (!sub_schurs || !sub_schurs->reuse_solver) { 4885 IS is_aux1,is_aux2; 4886 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4887 4888 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4889 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4890 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4891 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4892 for (i=0; i<n_D; i++) { 4893 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4894 } 4895 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4896 for (i=0, j=0; i<n_R; i++) { 4897 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4898 aux_array1[j++] = i; 4899 } 4900 } 4901 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4902 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4903 for (i=0, j=0; i<n_B; i++) { 4904 if (!PetscBTLookup(bitmask,is_indices[i])) { 4905 aux_array2[j++] = i; 4906 } 4907 } 4908 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4909 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4910 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4911 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4912 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4913 4914 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4915 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4916 for (i=0, j=0; i<n_R; i++) { 4917 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4918 aux_array1[j++] = i; 4919 } 4920 } 4921 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4922 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4923 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4924 } 4925 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4926 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4927 } else { 4928 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4929 IS tis; 4930 PetscInt schur_size; 4931 4932 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4933 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4934 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4935 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4936 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4937 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4938 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4939 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4940 } 4941 } 4942 PetscFunctionReturn(0); 4943 } 4944 4945 4946 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4947 { 4948 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4949 PC_IS *pcis = (PC_IS*)pc->data; 4950 PC pc_temp; 4951 Mat A_RR; 4952 MatReuse reuse; 4953 PetscScalar m_one = -1.0; 4954 PetscReal value; 4955 PetscInt n_D,n_R; 4956 PetscBool check_corr,issbaij; 4957 PetscErrorCode ierr; 4958 /* prefixes stuff */ 4959 char dir_prefix[256],neu_prefix[256],str_level[16]; 4960 size_t len; 4961 4962 PetscFunctionBegin; 4963 4964 /* compute prefixes */ 4965 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4966 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4967 if (!pcbddc->current_level) { 4968 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4969 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4970 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4971 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4972 } else { 4973 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 4974 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4975 len -= 15; /* remove "pc_bddc_coarse_" */ 4976 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4977 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4978 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4979 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4980 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4981 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4982 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4983 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4984 } 4985 4986 /* DIRICHLET PROBLEM */ 4987 if (dirichlet) { 4988 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4989 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4990 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4991 if (pcbddc->dbg_flag) { 4992 Mat A_IIn; 4993 4994 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 4995 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 4996 pcis->A_II = A_IIn; 4997 } 4998 } 4999 if (pcbddc->local_mat->symmetric_set) { 5000 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5001 } 5002 /* Matrix for Dirichlet problem is pcis->A_II */ 5003 n_D = pcis->n - pcis->n_B; 5004 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5005 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5006 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5007 /* default */ 5008 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5009 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5010 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5011 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5012 if (issbaij) { 5013 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5014 } else { 5015 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5016 } 5017 /* Allow user's customization */ 5018 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5019 } 5020 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 5021 if (sub_schurs && sub_schurs->reuse_solver) { 5022 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5023 5024 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5025 } 5026 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5027 if (!n_D) { 5028 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5029 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5030 } 5031 /* Set Up KSP for Dirichlet problem of BDDC */ 5032 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 5033 /* set ksp_D into pcis data */ 5034 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5035 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5036 pcis->ksp_D = pcbddc->ksp_D; 5037 } 5038 5039 /* NEUMANN PROBLEM */ 5040 A_RR = 0; 5041 if (neumann) { 5042 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5043 PetscInt ibs,mbs; 5044 PetscBool issbaij, reuse_neumann_solver; 5045 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5046 5047 reuse_neumann_solver = PETSC_FALSE; 5048 if (sub_schurs && sub_schurs->reuse_solver) { 5049 IS iP; 5050 5051 reuse_neumann_solver = PETSC_TRUE; 5052 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5053 if (iP) reuse_neumann_solver = PETSC_FALSE; 5054 } 5055 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5056 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5057 if (pcbddc->ksp_R) { /* already created ksp */ 5058 PetscInt nn_R; 5059 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5060 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5061 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5062 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5063 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5064 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5065 reuse = MAT_INITIAL_MATRIX; 5066 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5067 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5068 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5069 reuse = MAT_INITIAL_MATRIX; 5070 } else { /* safe to reuse the matrix */ 5071 reuse = MAT_REUSE_MATRIX; 5072 } 5073 } 5074 /* last check */ 5075 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5076 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5077 reuse = MAT_INITIAL_MATRIX; 5078 } 5079 } else { /* first time, so we need to create the matrix */ 5080 reuse = MAT_INITIAL_MATRIX; 5081 } 5082 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 5083 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5084 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5085 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5086 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5087 if (matis->A == pcbddc->local_mat) { 5088 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5089 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5090 } else { 5091 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5092 } 5093 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5094 if (matis->A == pcbddc->local_mat) { 5095 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5096 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5097 } else { 5098 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5099 } 5100 } 5101 /* extract A_RR */ 5102 if (reuse_neumann_solver) { 5103 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5104 5105 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5106 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5107 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5108 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5109 } else { 5110 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5111 } 5112 } else { 5113 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5114 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5115 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5116 } 5117 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5118 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5119 } 5120 if (pcbddc->local_mat->symmetric_set) { 5121 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5122 } 5123 if (!pcbddc->ksp_R) { /* create object if not present */ 5124 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5125 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5126 /* default */ 5127 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5128 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5129 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5130 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5131 if (issbaij) { 5132 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5133 } else { 5134 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5135 } 5136 /* Allow user's customization */ 5137 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5138 } 5139 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5140 if (!n_R) { 5141 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5142 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5143 } 5144 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5145 /* Reuse solver if it is present */ 5146 if (reuse_neumann_solver) { 5147 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5148 5149 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5150 } 5151 /* Set Up KSP for Neumann problem of BDDC */ 5152 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 5153 } 5154 5155 if (pcbddc->dbg_flag) { 5156 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5157 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5158 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5159 } 5160 5161 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5162 check_corr = PETSC_FALSE; 5163 if (pcbddc->NullSpace_corr[0]) { 5164 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5165 } 5166 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5167 check_corr = PETSC_TRUE; 5168 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5169 } 5170 if (neumann && pcbddc->NullSpace_corr[2]) { 5171 check_corr = PETSC_TRUE; 5172 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5173 } 5174 /* check Dirichlet and Neumann solvers */ 5175 if (pcbddc->dbg_flag) { 5176 if (dirichlet) { /* Dirichlet */ 5177 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5178 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5179 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5180 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5181 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5182 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5183 if (check_corr) { 5184 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 5185 } 5186 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5187 } 5188 if (neumann) { /* Neumann */ 5189 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5190 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5191 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5192 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5193 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5194 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5195 if (check_corr) { 5196 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 5197 } 5198 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5199 } 5200 } 5201 /* free Neumann problem's matrix */ 5202 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5203 PetscFunctionReturn(0); 5204 } 5205 5206 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5207 { 5208 PetscErrorCode ierr; 5209 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5210 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5211 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 5212 5213 PetscFunctionBegin; 5214 if (!reuse_solver) { 5215 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5216 } 5217 if (!pcbddc->switch_static) { 5218 if (applytranspose && pcbddc->local_auxmat1) { 5219 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5220 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5221 } 5222 if (!reuse_solver) { 5223 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5224 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5225 } else { 5226 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5227 5228 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5229 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5230 } 5231 } else { 5232 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5233 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5234 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5235 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5236 if (applytranspose && pcbddc->local_auxmat1) { 5237 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5238 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5239 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5240 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5241 } 5242 } 5243 if (!reuse_solver || pcbddc->switch_static) { 5244 if (applytranspose) { 5245 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5246 } else { 5247 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5248 } 5249 } else { 5250 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5251 5252 if (applytranspose) { 5253 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5254 } else { 5255 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5256 } 5257 } 5258 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5259 if (!pcbddc->switch_static) { 5260 if (!reuse_solver) { 5261 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5262 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5263 } else { 5264 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5265 5266 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5267 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5268 } 5269 if (!applytranspose && pcbddc->local_auxmat1) { 5270 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5271 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5272 } 5273 } else { 5274 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5275 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5276 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5277 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5278 if (!applytranspose && pcbddc->local_auxmat1) { 5279 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5280 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5281 } 5282 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5283 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5284 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5285 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5286 } 5287 PetscFunctionReturn(0); 5288 } 5289 5290 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5291 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5292 { 5293 PetscErrorCode ierr; 5294 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5295 PC_IS* pcis = (PC_IS*) (pc->data); 5296 const PetscScalar zero = 0.0; 5297 5298 PetscFunctionBegin; 5299 PetscBool ss = PETSC_FALSE; 5300 ierr = PetscOptionsGetBool(NULL,NULL,"-swap",&ss,NULL);CHKERRQ(ierr); 5301 if (ss) { 5302 Mat save_B = pcbddc->coarse_phi_B; 5303 pcbddc->coarse_phi_B = pcbddc->coarse_psi_B; 5304 pcbddc->coarse_psi_B = save_B; 5305 Mat save_D = pcbddc->coarse_phi_D; 5306 pcbddc->coarse_phi_D = pcbddc->coarse_psi_D; 5307 pcbddc->coarse_psi_D = save_D; 5308 } 5309 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5310 if (!pcbddc->benign_apply_coarse_only) { 5311 if (applytranspose) { 5312 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5313 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5314 } else { 5315 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5316 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5317 } 5318 } else { 5319 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5320 } 5321 5322 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5323 if (pcbddc->benign_n) { 5324 PetscScalar *array; 5325 PetscInt j; 5326 5327 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5328 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5329 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5330 } 5331 5332 /* start communications from local primal nodes to rhs of coarse solver */ 5333 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5334 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5335 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5336 5337 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5338 if (pcbddc->coarse_ksp) { 5339 Mat coarse_mat; 5340 Vec rhs,sol; 5341 MatNullSpace nullsp; 5342 PetscBool isbddc = PETSC_FALSE; 5343 5344 if (pcbddc->benign_have_null) { 5345 PC coarse_pc; 5346 5347 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5348 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5349 /* we need to propagate to coarser levels the need for a possible benign correction */ 5350 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5351 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5352 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5353 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5354 } 5355 } 5356 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5357 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5358 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5359 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5360 if (nullsp) { 5361 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5362 } 5363 if (applytranspose) { 5364 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5365 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5366 } else { 5367 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5368 PC coarse_pc; 5369 5370 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5371 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5372 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5373 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5374 } else { 5375 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5376 } 5377 } 5378 /* we don't need the benign correction at coarser levels anymore */ 5379 if (pcbddc->benign_have_null && isbddc) { 5380 PC coarse_pc; 5381 PC_BDDC* coarsepcbddc; 5382 5383 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5384 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5385 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5386 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5387 } 5388 if (nullsp) { 5389 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5390 } 5391 } 5392 5393 /* Local solution on R nodes */ 5394 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5395 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5396 } 5397 /* communications from coarse sol to local primal nodes */ 5398 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5399 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5400 5401 /* Sum contributions from the two levels */ 5402 if (!pcbddc->benign_apply_coarse_only) { 5403 if (applytranspose) { 5404 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5405 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5406 } else { 5407 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5408 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5409 } 5410 /* store p0 */ 5411 if (pcbddc->benign_n) { 5412 PetscScalar *array; 5413 PetscInt j; 5414 5415 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5416 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5417 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5418 } 5419 } else { /* expand the coarse solution */ 5420 if (applytranspose) { 5421 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5422 } else { 5423 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5424 } 5425 } 5426 if (ss) { 5427 Mat save_B = pcbddc->coarse_phi_B; 5428 pcbddc->coarse_phi_B = pcbddc->coarse_psi_B; 5429 pcbddc->coarse_psi_B = save_B; 5430 Mat save_D = pcbddc->coarse_phi_D; 5431 pcbddc->coarse_phi_D = pcbddc->coarse_psi_D; 5432 pcbddc->coarse_psi_D = save_D; 5433 } 5434 PetscFunctionReturn(0); 5435 } 5436 5437 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5438 { 5439 PetscErrorCode ierr; 5440 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5441 PetscScalar *array; 5442 Vec from,to; 5443 5444 PetscFunctionBegin; 5445 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5446 from = pcbddc->coarse_vec; 5447 to = pcbddc->vec1_P; 5448 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5449 Vec tvec; 5450 5451 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5452 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5453 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5454 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5455 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5456 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5457 } 5458 } else { /* from local to global -> put data in coarse right hand side */ 5459 from = pcbddc->vec1_P; 5460 to = pcbddc->coarse_vec; 5461 } 5462 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5463 PetscFunctionReturn(0); 5464 } 5465 5466 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5467 { 5468 PetscErrorCode ierr; 5469 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5470 PetscScalar *array; 5471 Vec from,to; 5472 5473 PetscFunctionBegin; 5474 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5475 from = pcbddc->coarse_vec; 5476 to = pcbddc->vec1_P; 5477 } else { /* from local to global -> put data in coarse right hand side */ 5478 from = pcbddc->vec1_P; 5479 to = pcbddc->coarse_vec; 5480 } 5481 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5482 if (smode == SCATTER_FORWARD) { 5483 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5484 Vec tvec; 5485 5486 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5487 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5488 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5489 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5490 } 5491 } else { 5492 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5493 ierr = VecResetArray(from);CHKERRQ(ierr); 5494 } 5495 } 5496 PetscFunctionReturn(0); 5497 } 5498 5499 /* uncomment for testing purposes */ 5500 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5501 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5502 { 5503 PetscErrorCode ierr; 5504 PC_IS* pcis = (PC_IS*)(pc->data); 5505 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5506 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5507 /* one and zero */ 5508 PetscScalar one=1.0,zero=0.0; 5509 /* space to store constraints and their local indices */ 5510 PetscScalar *constraints_data; 5511 PetscInt *constraints_idxs,*constraints_idxs_B; 5512 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5513 PetscInt *constraints_n; 5514 /* iterators */ 5515 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5516 /* BLAS integers */ 5517 PetscBLASInt lwork,lierr; 5518 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5519 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5520 /* reuse */ 5521 PetscInt olocal_primal_size,olocal_primal_size_cc; 5522 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5523 /* change of basis */ 5524 PetscBool qr_needed; 5525 PetscBT change_basis,qr_needed_idx; 5526 /* auxiliary stuff */ 5527 PetscInt *nnz,*is_indices; 5528 PetscInt ncc; 5529 /* some quantities */ 5530 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5531 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5532 5533 PetscFunctionBegin; 5534 /* Destroy Mat objects computed previously */ 5535 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5536 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5537 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5538 /* save info on constraints from previous setup (if any) */ 5539 olocal_primal_size = pcbddc->local_primal_size; 5540 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5541 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5542 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5543 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5544 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5545 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5546 5547 if (!pcbddc->adaptive_selection) { 5548 IS ISForVertices,*ISForFaces,*ISForEdges; 5549 MatNullSpace nearnullsp; 5550 const Vec *nearnullvecs; 5551 Vec *localnearnullsp; 5552 PetscScalar *array; 5553 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5554 PetscBool nnsp_has_cnst; 5555 /* LAPACK working arrays for SVD or POD */ 5556 PetscBool skip_lapack,boolforchange; 5557 PetscScalar *work; 5558 PetscReal *singular_vals; 5559 #if defined(PETSC_USE_COMPLEX) 5560 PetscReal *rwork; 5561 #endif 5562 #if defined(PETSC_MISSING_LAPACK_GESVD) 5563 PetscScalar *temp_basis,*correlation_mat; 5564 #else 5565 PetscBLASInt dummy_int=1; 5566 PetscScalar dummy_scalar=1.; 5567 #endif 5568 5569 /* Get index sets for faces, edges and vertices from graph */ 5570 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5571 /* print some info */ 5572 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5573 PetscInt nv; 5574 5575 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5576 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5577 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5578 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5579 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5580 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5581 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5582 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5583 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5584 } 5585 5586 /* free unneeded index sets */ 5587 if (!pcbddc->use_vertices) { 5588 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5589 } 5590 if (!pcbddc->use_edges) { 5591 for (i=0;i<n_ISForEdges;i++) { 5592 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5593 } 5594 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5595 n_ISForEdges = 0; 5596 } 5597 if (!pcbddc->use_faces) { 5598 for (i=0;i<n_ISForFaces;i++) { 5599 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5600 } 5601 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5602 n_ISForFaces = 0; 5603 } 5604 5605 /* check if near null space is attached to global mat */ 5606 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5607 if (nearnullsp) { 5608 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5609 /* remove any stored info */ 5610 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5611 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5612 /* store information for BDDC solver reuse */ 5613 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5614 pcbddc->onearnullspace = nearnullsp; 5615 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5616 for (i=0;i<nnsp_size;i++) { 5617 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5618 } 5619 } else { /* if near null space is not provided BDDC uses constants by default */ 5620 nnsp_size = 0; 5621 nnsp_has_cnst = PETSC_TRUE; 5622 } 5623 /* get max number of constraints on a single cc */ 5624 max_constraints = nnsp_size; 5625 if (nnsp_has_cnst) max_constraints++; 5626 5627 /* 5628 Evaluate maximum storage size needed by the procedure 5629 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5630 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5631 There can be multiple constraints per connected component 5632 */ 5633 n_vertices = 0; 5634 if (ISForVertices) { 5635 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5636 } 5637 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5638 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5639 5640 total_counts = n_ISForFaces+n_ISForEdges; 5641 total_counts *= max_constraints; 5642 total_counts += n_vertices; 5643 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5644 5645 total_counts = 0; 5646 max_size_of_constraint = 0; 5647 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5648 IS used_is; 5649 if (i<n_ISForEdges) { 5650 used_is = ISForEdges[i]; 5651 } else { 5652 used_is = ISForFaces[i-n_ISForEdges]; 5653 } 5654 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5655 total_counts += j; 5656 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5657 } 5658 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5659 5660 /* get local part of global near null space vectors */ 5661 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5662 for (k=0;k<nnsp_size;k++) { 5663 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5664 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5665 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5666 } 5667 5668 /* whether or not to skip lapack calls */ 5669 skip_lapack = PETSC_TRUE; 5670 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5671 5672 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5673 if (!skip_lapack) { 5674 PetscScalar temp_work; 5675 5676 #if defined(PETSC_MISSING_LAPACK_GESVD) 5677 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5678 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5679 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5680 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5681 #if defined(PETSC_USE_COMPLEX) 5682 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5683 #endif 5684 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5685 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5686 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5687 lwork = -1; 5688 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5689 #if !defined(PETSC_USE_COMPLEX) 5690 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5691 #else 5692 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5693 #endif 5694 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5695 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5696 #else /* on missing GESVD */ 5697 /* SVD */ 5698 PetscInt max_n,min_n; 5699 max_n = max_size_of_constraint; 5700 min_n = max_constraints; 5701 if (max_size_of_constraint < max_constraints) { 5702 min_n = max_size_of_constraint; 5703 max_n = max_constraints; 5704 } 5705 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5706 #if defined(PETSC_USE_COMPLEX) 5707 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5708 #endif 5709 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5710 lwork = -1; 5711 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5712 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5713 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5714 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5715 #if !defined(PETSC_USE_COMPLEX) 5716 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5717 #else 5718 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5719 #endif 5720 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5721 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5722 #endif /* on missing GESVD */ 5723 /* Allocate optimal workspace */ 5724 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5725 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5726 } 5727 /* Now we can loop on constraining sets */ 5728 total_counts = 0; 5729 constraints_idxs_ptr[0] = 0; 5730 constraints_data_ptr[0] = 0; 5731 /* vertices */ 5732 if (n_vertices) { 5733 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5734 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5735 for (i=0;i<n_vertices;i++) { 5736 constraints_n[total_counts] = 1; 5737 constraints_data[total_counts] = 1.0; 5738 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5739 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5740 total_counts++; 5741 } 5742 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5743 n_vertices = total_counts; 5744 } 5745 5746 /* edges and faces */ 5747 total_counts_cc = total_counts; 5748 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5749 IS used_is; 5750 PetscBool idxs_copied = PETSC_FALSE; 5751 5752 if (ncc<n_ISForEdges) { 5753 used_is = ISForEdges[ncc]; 5754 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5755 } else { 5756 used_is = ISForFaces[ncc-n_ISForEdges]; 5757 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5758 } 5759 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5760 5761 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5762 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5763 /* change of basis should not be performed on local periodic nodes */ 5764 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5765 if (nnsp_has_cnst) { 5766 PetscScalar quad_value; 5767 5768 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5769 idxs_copied = PETSC_TRUE; 5770 5771 if (!pcbddc->use_nnsp_true) { 5772 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5773 } else { 5774 quad_value = 1.0; 5775 } 5776 for (j=0;j<size_of_constraint;j++) { 5777 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5778 } 5779 temp_constraints++; 5780 total_counts++; 5781 } 5782 for (k=0;k<nnsp_size;k++) { 5783 PetscReal real_value; 5784 PetscScalar *ptr_to_data; 5785 5786 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5787 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5788 for (j=0;j<size_of_constraint;j++) { 5789 ptr_to_data[j] = array[is_indices[j]]; 5790 } 5791 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5792 /* check if array is null on the connected component */ 5793 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5794 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5795 if (real_value > 0.0) { /* keep indices and values */ 5796 temp_constraints++; 5797 total_counts++; 5798 if (!idxs_copied) { 5799 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5800 idxs_copied = PETSC_TRUE; 5801 } 5802 } 5803 } 5804 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5805 valid_constraints = temp_constraints; 5806 if (!pcbddc->use_nnsp_true && temp_constraints) { 5807 if (temp_constraints == 1) { /* just normalize the constraint */ 5808 PetscScalar norm,*ptr_to_data; 5809 5810 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5811 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5812 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5813 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5814 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5815 } else { /* perform SVD */ 5816 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5817 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5818 5819 #if defined(PETSC_MISSING_LAPACK_GESVD) 5820 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5821 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5822 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5823 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5824 from that computed using LAPACKgesvd 5825 -> This is due to a different computation of eigenvectors in LAPACKheev 5826 -> The quality of the POD-computed basis will be the same */ 5827 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5828 /* Store upper triangular part of correlation matrix */ 5829 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5830 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5831 for (j=0;j<temp_constraints;j++) { 5832 for (k=0;k<j+1;k++) { 5833 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5834 } 5835 } 5836 /* compute eigenvalues and eigenvectors of correlation matrix */ 5837 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5838 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5839 #if !defined(PETSC_USE_COMPLEX) 5840 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5841 #else 5842 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5843 #endif 5844 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5845 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5846 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5847 j = 0; 5848 while (j < temp_constraints && singular_vals[j] < tol) j++; 5849 total_counts = total_counts-j; 5850 valid_constraints = temp_constraints-j; 5851 /* scale and copy POD basis into used quadrature memory */ 5852 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5853 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5854 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5855 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5856 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5857 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5858 if (j<temp_constraints) { 5859 PetscInt ii; 5860 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5861 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5862 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5863 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5864 for (k=0;k<temp_constraints-j;k++) { 5865 for (ii=0;ii<size_of_constraint;ii++) { 5866 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5867 } 5868 } 5869 } 5870 #else /* on missing GESVD */ 5871 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5872 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5873 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5874 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5875 #if !defined(PETSC_USE_COMPLEX) 5876 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5877 #else 5878 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5879 #endif 5880 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5881 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5882 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5883 k = temp_constraints; 5884 if (k > size_of_constraint) k = size_of_constraint; 5885 j = 0; 5886 while (j < k && singular_vals[k-j-1] < tol) j++; 5887 valid_constraints = k-j; 5888 total_counts = total_counts-temp_constraints+valid_constraints; 5889 #endif /* on missing GESVD */ 5890 } 5891 } 5892 /* update pointers information */ 5893 if (valid_constraints) { 5894 constraints_n[total_counts_cc] = valid_constraints; 5895 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5896 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5897 /* set change_of_basis flag */ 5898 if (boolforchange) { 5899 PetscBTSet(change_basis,total_counts_cc); 5900 } 5901 total_counts_cc++; 5902 } 5903 } 5904 /* free workspace */ 5905 if (!skip_lapack) { 5906 ierr = PetscFree(work);CHKERRQ(ierr); 5907 #if defined(PETSC_USE_COMPLEX) 5908 ierr = PetscFree(rwork);CHKERRQ(ierr); 5909 #endif 5910 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5911 #if defined(PETSC_MISSING_LAPACK_GESVD) 5912 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5913 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5914 #endif 5915 } 5916 for (k=0;k<nnsp_size;k++) { 5917 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5918 } 5919 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5920 /* free index sets of faces, edges and vertices */ 5921 for (i=0;i<n_ISForFaces;i++) { 5922 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5923 } 5924 if (n_ISForFaces) { 5925 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5926 } 5927 for (i=0;i<n_ISForEdges;i++) { 5928 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5929 } 5930 if (n_ISForEdges) { 5931 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5932 } 5933 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5934 } else { 5935 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5936 5937 total_counts = 0; 5938 n_vertices = 0; 5939 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5940 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5941 } 5942 max_constraints = 0; 5943 total_counts_cc = 0; 5944 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5945 total_counts += pcbddc->adaptive_constraints_n[i]; 5946 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5947 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5948 } 5949 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5950 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5951 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5952 constraints_data = pcbddc->adaptive_constraints_data; 5953 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5954 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5955 total_counts_cc = 0; 5956 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5957 if (pcbddc->adaptive_constraints_n[i]) { 5958 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5959 } 5960 } 5961 #if 0 5962 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5963 for (i=0;i<total_counts_cc;i++) { 5964 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5965 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5966 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5967 printf(" %d",constraints_idxs[j]); 5968 } 5969 printf("\n"); 5970 printf("number of cc: %d\n",constraints_n[i]); 5971 } 5972 for (i=0;i<n_vertices;i++) { 5973 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5974 } 5975 for (i=0;i<sub_schurs->n_subs;i++) { 5976 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5977 } 5978 #endif 5979 5980 max_size_of_constraint = 0; 5981 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5982 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5983 /* Change of basis */ 5984 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5985 if (pcbddc->use_change_of_basis) { 5986 for (i=0;i<sub_schurs->n_subs;i++) { 5987 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5988 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5989 } 5990 } 5991 } 5992 } 5993 pcbddc->local_primal_size = total_counts; 5994 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5995 5996 /* map constraints_idxs in boundary numbering */ 5997 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 5998 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 5999 6000 /* Create constraint matrix */ 6001 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6002 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 6003 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 6004 6005 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6006 /* determine if a QR strategy is needed for change of basis */ 6007 qr_needed = PETSC_FALSE; 6008 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6009 total_primal_vertices=0; 6010 pcbddc->local_primal_size_cc = 0; 6011 for (i=0;i<total_counts_cc;i++) { 6012 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6013 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6014 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6015 pcbddc->local_primal_size_cc += 1; 6016 } else if (PetscBTLookup(change_basis,i)) { 6017 for (k=0;k<constraints_n[i];k++) { 6018 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6019 } 6020 pcbddc->local_primal_size_cc += constraints_n[i]; 6021 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6022 PetscBTSet(qr_needed_idx,i); 6023 qr_needed = PETSC_TRUE; 6024 } 6025 } else { 6026 pcbddc->local_primal_size_cc += 1; 6027 } 6028 } 6029 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6030 pcbddc->n_vertices = total_primal_vertices; 6031 /* permute indices in order to have a sorted set of vertices */ 6032 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6033 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6034 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 6035 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6036 6037 /* nonzero structure of constraint matrix */ 6038 /* and get reference dof for local constraints */ 6039 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6040 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6041 6042 j = total_primal_vertices; 6043 total_counts = total_primal_vertices; 6044 cum = total_primal_vertices; 6045 for (i=n_vertices;i<total_counts_cc;i++) { 6046 if (!PetscBTLookup(change_basis,i)) { 6047 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6048 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6049 cum++; 6050 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6051 for (k=0;k<constraints_n[i];k++) { 6052 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6053 nnz[j+k] = size_of_constraint; 6054 } 6055 j += constraints_n[i]; 6056 } 6057 } 6058 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6059 ierr = PetscFree(nnz);CHKERRQ(ierr); 6060 6061 /* set values in constraint matrix */ 6062 for (i=0;i<total_primal_vertices;i++) { 6063 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6064 } 6065 total_counts = total_primal_vertices; 6066 for (i=n_vertices;i<total_counts_cc;i++) { 6067 if (!PetscBTLookup(change_basis,i)) { 6068 PetscInt *cols; 6069 6070 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6071 cols = constraints_idxs+constraints_idxs_ptr[i]; 6072 for (k=0;k<constraints_n[i];k++) { 6073 PetscInt row = total_counts+k; 6074 PetscScalar *vals; 6075 6076 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6077 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6078 } 6079 total_counts += constraints_n[i]; 6080 } 6081 } 6082 /* assembling */ 6083 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6084 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6085 6086 /* 6087 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6088 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 6089 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 6090 */ 6091 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6092 if (pcbddc->use_change_of_basis) { 6093 /* dual and primal dofs on a single cc */ 6094 PetscInt dual_dofs,primal_dofs; 6095 /* working stuff for GEQRF */ 6096 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 6097 PetscBLASInt lqr_work; 6098 /* working stuff for UNGQR */ 6099 PetscScalar *gqr_work,lgqr_work_t; 6100 PetscBLASInt lgqr_work; 6101 /* working stuff for TRTRS */ 6102 PetscScalar *trs_rhs; 6103 PetscBLASInt Blas_NRHS; 6104 /* pointers for values insertion into change of basis matrix */ 6105 PetscInt *start_rows,*start_cols; 6106 PetscScalar *start_vals; 6107 /* working stuff for values insertion */ 6108 PetscBT is_primal; 6109 PetscInt *aux_primal_numbering_B; 6110 /* matrix sizes */ 6111 PetscInt global_size,local_size; 6112 /* temporary change of basis */ 6113 Mat localChangeOfBasisMatrix; 6114 /* extra space for debugging */ 6115 PetscScalar *dbg_work; 6116 6117 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6118 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6119 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6120 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6121 /* nonzeros for local mat */ 6122 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6123 if (!pcbddc->benign_change || pcbddc->fake_change) { 6124 for (i=0;i<pcis->n;i++) nnz[i]=1; 6125 } else { 6126 const PetscInt *ii; 6127 PetscInt n; 6128 PetscBool flg_row; 6129 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6130 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6131 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6132 } 6133 for (i=n_vertices;i<total_counts_cc;i++) { 6134 if (PetscBTLookup(change_basis,i)) { 6135 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6136 if (PetscBTLookup(qr_needed_idx,i)) { 6137 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6138 } else { 6139 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6140 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6141 } 6142 } 6143 } 6144 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6145 ierr = PetscFree(nnz);CHKERRQ(ierr); 6146 /* Set interior change in the matrix */ 6147 if (!pcbddc->benign_change || pcbddc->fake_change) { 6148 for (i=0;i<pcis->n;i++) { 6149 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6150 } 6151 } else { 6152 const PetscInt *ii,*jj; 6153 PetscScalar *aa; 6154 PetscInt n; 6155 PetscBool flg_row; 6156 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6157 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6158 for (i=0;i<n;i++) { 6159 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6160 } 6161 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6162 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6163 } 6164 6165 if (pcbddc->dbg_flag) { 6166 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6167 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6168 } 6169 6170 6171 /* Now we loop on the constraints which need a change of basis */ 6172 /* 6173 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6174 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6175 6176 Basic blocks of change of basis matrix T computed by 6177 6178 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6179 6180 | 1 0 ... 0 s_1/S | 6181 | 0 1 ... 0 s_2/S | 6182 | ... | 6183 | 0 ... 1 s_{n-1}/S | 6184 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6185 6186 with S = \sum_{i=1}^n s_i^2 6187 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6188 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6189 6190 - QR decomposition of constraints otherwise 6191 */ 6192 if (qr_needed) { 6193 /* space to store Q */ 6194 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6195 /* array to store scaling factors for reflectors */ 6196 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6197 /* first we issue queries for optimal work */ 6198 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6199 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6200 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6201 lqr_work = -1; 6202 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6203 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6204 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6205 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6206 lgqr_work = -1; 6207 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6208 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6209 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6210 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6211 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6212 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6213 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6214 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6215 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6216 /* array to store rhs and solution of triangular solver */ 6217 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6218 /* allocating workspace for check */ 6219 if (pcbddc->dbg_flag) { 6220 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6221 } 6222 } 6223 /* array to store whether a node is primal or not */ 6224 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6225 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6226 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6227 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 6228 for (i=0;i<total_primal_vertices;i++) { 6229 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6230 } 6231 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6232 6233 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6234 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6235 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6236 if (PetscBTLookup(change_basis,total_counts)) { 6237 /* get constraint info */ 6238 primal_dofs = constraints_n[total_counts]; 6239 dual_dofs = size_of_constraint-primal_dofs; 6240 6241 if (pcbddc->dbg_flag) { 6242 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6243 } 6244 6245 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6246 6247 /* copy quadrature constraints for change of basis check */ 6248 if (pcbddc->dbg_flag) { 6249 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6250 } 6251 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6252 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6253 6254 /* compute QR decomposition of constraints */ 6255 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6256 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6257 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6258 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6259 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6260 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6261 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6262 6263 /* explictly compute R^-T */ 6264 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 6265 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6266 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6267 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6268 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6269 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6270 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6271 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6272 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6273 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6274 6275 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6276 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6277 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6278 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6279 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6280 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6281 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6282 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6283 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6284 6285 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6286 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6287 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6288 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6289 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6290 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6291 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6292 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6293 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6294 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6295 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6296 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6297 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6298 6299 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6300 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6301 /* insert cols for primal dofs */ 6302 for (j=0;j<primal_dofs;j++) { 6303 start_vals = &qr_basis[j*size_of_constraint]; 6304 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6305 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6306 } 6307 /* insert cols for dual dofs */ 6308 for (j=0,k=0;j<dual_dofs;k++) { 6309 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6310 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6311 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6312 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6313 j++; 6314 } 6315 } 6316 6317 /* check change of basis */ 6318 if (pcbddc->dbg_flag) { 6319 PetscInt ii,jj; 6320 PetscBool valid_qr=PETSC_TRUE; 6321 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6322 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6323 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6324 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6325 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6326 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6327 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6328 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6329 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6330 for (jj=0;jj<size_of_constraint;jj++) { 6331 for (ii=0;ii<primal_dofs;ii++) { 6332 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6333 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6334 } 6335 } 6336 if (!valid_qr) { 6337 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6338 for (jj=0;jj<size_of_constraint;jj++) { 6339 for (ii=0;ii<primal_dofs;ii++) { 6340 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6341 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6342 } 6343 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6344 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6345 } 6346 } 6347 } 6348 } else { 6349 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6350 } 6351 } 6352 } else { /* simple transformation block */ 6353 PetscInt row,col; 6354 PetscScalar val,norm; 6355 6356 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6357 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6358 for (j=0;j<size_of_constraint;j++) { 6359 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6360 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6361 if (!PetscBTLookup(is_primal,row_B)) { 6362 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6363 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6364 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6365 } else { 6366 for (k=0;k<size_of_constraint;k++) { 6367 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6368 if (row != col) { 6369 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6370 } else { 6371 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6372 } 6373 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6374 } 6375 } 6376 } 6377 if (pcbddc->dbg_flag) { 6378 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6379 } 6380 } 6381 } else { 6382 if (pcbddc->dbg_flag) { 6383 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6384 } 6385 } 6386 } 6387 6388 /* free workspace */ 6389 if (qr_needed) { 6390 if (pcbddc->dbg_flag) { 6391 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6392 } 6393 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6394 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6395 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6396 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6397 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6398 } 6399 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6400 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6401 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6402 6403 /* assembling of global change of variable */ 6404 if (!pcbddc->fake_change) { 6405 Mat tmat; 6406 PetscInt bs; 6407 6408 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6409 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6410 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6411 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6412 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6413 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6414 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6415 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6416 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6417 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6418 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6419 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6420 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6421 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6422 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6423 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6424 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6425 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6426 6427 /* check */ 6428 if (pcbddc->dbg_flag) { 6429 PetscReal error; 6430 Vec x,x_change; 6431 6432 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6433 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6434 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6435 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6436 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6437 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6438 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6439 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6440 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6441 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6442 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6443 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6444 if (error > PETSC_SMALL) { 6445 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6446 } 6447 ierr = VecDestroy(&x);CHKERRQ(ierr); 6448 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6449 } 6450 /* adapt sub_schurs computed (if any) */ 6451 if (pcbddc->use_deluxe_scaling) { 6452 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6453 6454 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6455 if (sub_schurs && sub_schurs->S_Ej_all) { 6456 Mat S_new,tmat; 6457 IS is_all_N,is_V_Sall = NULL; 6458 6459 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6460 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6461 if (pcbddc->deluxe_zerorows) { 6462 ISLocalToGlobalMapping NtoSall; 6463 IS is_V; 6464 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6465 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6466 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6467 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6468 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6469 } 6470 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6471 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6472 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6473 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6474 if (pcbddc->deluxe_zerorows) { 6475 const PetscScalar *array; 6476 const PetscInt *idxs_V,*idxs_all; 6477 PetscInt i,n_V; 6478 6479 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6480 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6481 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6482 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6483 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6484 for (i=0;i<n_V;i++) { 6485 PetscScalar val; 6486 PetscInt idx; 6487 6488 idx = idxs_V[i]; 6489 val = array[idxs_all[idxs_V[i]]]; 6490 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6491 } 6492 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6493 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6494 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6495 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6496 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6497 } 6498 sub_schurs->S_Ej_all = S_new; 6499 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6500 if (sub_schurs->sum_S_Ej_all) { 6501 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6502 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6503 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6504 if (pcbddc->deluxe_zerorows) { 6505 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6506 } 6507 sub_schurs->sum_S_Ej_all = S_new; 6508 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6509 } 6510 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6511 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6512 } 6513 /* destroy any change of basis context in sub_schurs */ 6514 if (sub_schurs && sub_schurs->change) { 6515 PetscInt i; 6516 6517 for (i=0;i<sub_schurs->n_subs;i++) { 6518 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6519 } 6520 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6521 } 6522 } 6523 if (pcbddc->switch_static) { /* need to save the local change */ 6524 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6525 } else { 6526 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6527 } 6528 /* determine if any process has changed the pressures locally */ 6529 pcbddc->change_interior = pcbddc->benign_have_null; 6530 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6531 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6532 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6533 pcbddc->use_qr_single = qr_needed; 6534 } 6535 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6536 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6537 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6538 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6539 } else { 6540 Mat benign_global = NULL; 6541 if (pcbddc->benign_have_null) { 6542 Mat tmat; 6543 6544 pcbddc->change_interior = PETSC_TRUE; 6545 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6546 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6547 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6548 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6549 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6550 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6551 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6552 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6553 if (pcbddc->benign_change) { 6554 Mat M; 6555 6556 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6557 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6558 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6559 ierr = MatDestroy(&M);CHKERRQ(ierr); 6560 } else { 6561 Mat eye; 6562 PetscScalar *array; 6563 6564 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6565 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6566 for (i=0;i<pcis->n;i++) { 6567 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6568 } 6569 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6570 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6571 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6572 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6573 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6574 } 6575 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6576 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6577 } 6578 if (pcbddc->user_ChangeOfBasisMatrix) { 6579 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6580 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6581 } else if (pcbddc->benign_have_null) { 6582 pcbddc->ChangeOfBasisMatrix = benign_global; 6583 } 6584 } 6585 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6586 IS is_global; 6587 const PetscInt *gidxs; 6588 6589 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6590 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6591 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6592 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6593 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6594 } 6595 } 6596 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6597 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6598 } 6599 6600 if (!pcbddc->fake_change) { 6601 /* add pressure dofs to set of primal nodes for numbering purposes */ 6602 for (i=0;i<pcbddc->benign_n;i++) { 6603 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6604 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6605 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6606 pcbddc->local_primal_size_cc++; 6607 pcbddc->local_primal_size++; 6608 } 6609 6610 /* check if a new primal space has been introduced (also take into account benign trick) */ 6611 pcbddc->new_primal_space_local = PETSC_TRUE; 6612 if (olocal_primal_size == pcbddc->local_primal_size) { 6613 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6614 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6615 if (!pcbddc->new_primal_space_local) { 6616 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6617 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6618 } 6619 } 6620 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6621 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6622 } 6623 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6624 6625 /* flush dbg viewer */ 6626 if (pcbddc->dbg_flag) { 6627 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6628 } 6629 6630 /* free workspace */ 6631 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6632 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6633 if (!pcbddc->adaptive_selection) { 6634 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6635 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6636 } else { 6637 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6638 pcbddc->adaptive_constraints_idxs_ptr, 6639 pcbddc->adaptive_constraints_data_ptr, 6640 pcbddc->adaptive_constraints_idxs, 6641 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6642 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6643 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6644 } 6645 PetscFunctionReturn(0); 6646 } 6647 6648 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6649 { 6650 ISLocalToGlobalMapping map; 6651 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6652 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6653 PetscInt i,N; 6654 PetscBool rcsr = PETSC_FALSE; 6655 PetscErrorCode ierr; 6656 6657 PetscFunctionBegin; 6658 if (pcbddc->recompute_topography) { 6659 pcbddc->graphanalyzed = PETSC_FALSE; 6660 /* Reset previously computed graph */ 6661 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6662 /* Init local Graph struct */ 6663 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6664 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6665 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6666 6667 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6668 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6669 } 6670 /* Check validity of the csr graph passed in by the user */ 6671 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6672 6673 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6674 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6675 PetscInt *xadj,*adjncy; 6676 PetscInt nvtxs; 6677 PetscBool flg_row=PETSC_FALSE; 6678 6679 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6680 if (flg_row) { 6681 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6682 pcbddc->computed_rowadj = PETSC_TRUE; 6683 } 6684 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6685 rcsr = PETSC_TRUE; 6686 } 6687 if (pcbddc->dbg_flag) { 6688 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6689 } 6690 6691 /* Setup of Graph */ 6692 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6693 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6694 6695 /* attach info on disconnected subdomains if present */ 6696 if (pcbddc->n_local_subs) { 6697 PetscInt *local_subs; 6698 6699 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6700 for (i=0;i<pcbddc->n_local_subs;i++) { 6701 const PetscInt *idxs; 6702 PetscInt nl,j; 6703 6704 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6705 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6706 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6707 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6708 } 6709 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6710 pcbddc->mat_graph->local_subs = local_subs; 6711 } 6712 } 6713 6714 if (!pcbddc->graphanalyzed) { 6715 /* Graph's connected components analysis */ 6716 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6717 pcbddc->graphanalyzed = PETSC_TRUE; 6718 } 6719 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6720 PetscFunctionReturn(0); 6721 } 6722 6723 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6724 { 6725 PetscInt i,j; 6726 PetscScalar *alphas; 6727 PetscErrorCode ierr; 6728 6729 PetscFunctionBegin; 6730 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6731 for (i=0;i<n;i++) { 6732 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6733 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6734 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6735 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6736 } 6737 ierr = PetscFree(alphas);CHKERRQ(ierr); 6738 PetscFunctionReturn(0); 6739 } 6740 6741 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6742 { 6743 Mat A; 6744 PetscInt n_neighs,*neighs,*n_shared,**shared; 6745 PetscMPIInt size,rank,color; 6746 PetscInt *xadj,*adjncy; 6747 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6748 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6749 PetscInt void_procs,*procs_candidates = NULL; 6750 PetscInt xadj_count,*count; 6751 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6752 PetscSubcomm psubcomm; 6753 MPI_Comm subcomm; 6754 PetscErrorCode ierr; 6755 6756 PetscFunctionBegin; 6757 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6758 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6759 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6760 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6761 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6762 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6763 6764 if (have_void) *have_void = PETSC_FALSE; 6765 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6766 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6767 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6768 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6769 im_active = !!n; 6770 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6771 void_procs = size - active_procs; 6772 /* get ranks of of non-active processes in mat communicator */ 6773 if (void_procs) { 6774 PetscInt ncand; 6775 6776 if (have_void) *have_void = PETSC_TRUE; 6777 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6778 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6779 for (i=0,ncand=0;i<size;i++) { 6780 if (!procs_candidates[i]) { 6781 procs_candidates[ncand++] = i; 6782 } 6783 } 6784 /* force n_subdomains to be not greater that the number of non-active processes */ 6785 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6786 } 6787 6788 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6789 number of subdomains requested 1 -> send to master or first candidate in voids */ 6790 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6791 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6792 PetscInt issize,isidx,dest; 6793 if (*n_subdomains == 1) dest = 0; 6794 else dest = rank; 6795 if (im_active) { 6796 issize = 1; 6797 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6798 isidx = procs_candidates[dest]; 6799 } else { 6800 isidx = dest; 6801 } 6802 } else { 6803 issize = 0; 6804 isidx = -1; 6805 } 6806 if (*n_subdomains != 1) *n_subdomains = active_procs; 6807 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6808 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6809 PetscFunctionReturn(0); 6810 } 6811 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6812 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6813 threshold = PetscMax(threshold,2); 6814 6815 /* Get info on mapping */ 6816 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6817 6818 /* build local CSR graph of subdomains' connectivity */ 6819 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6820 xadj[0] = 0; 6821 xadj[1] = PetscMax(n_neighs-1,0); 6822 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6823 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6824 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6825 for (i=1;i<n_neighs;i++) 6826 for (j=0;j<n_shared[i];j++) 6827 count[shared[i][j]] += 1; 6828 6829 xadj_count = 0; 6830 for (i=1;i<n_neighs;i++) { 6831 for (j=0;j<n_shared[i];j++) { 6832 if (count[shared[i][j]] < threshold) { 6833 adjncy[xadj_count] = neighs[i]; 6834 adjncy_wgt[xadj_count] = n_shared[i]; 6835 xadj_count++; 6836 break; 6837 } 6838 } 6839 } 6840 xadj[1] = xadj_count; 6841 ierr = PetscFree(count);CHKERRQ(ierr); 6842 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6843 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6844 6845 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6846 6847 /* Restrict work on active processes only */ 6848 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6849 if (void_procs) { 6850 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6851 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6852 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6853 subcomm = PetscSubcommChild(psubcomm); 6854 } else { 6855 psubcomm = NULL; 6856 subcomm = PetscObjectComm((PetscObject)mat); 6857 } 6858 6859 v_wgt = NULL; 6860 if (!color) { 6861 ierr = PetscFree(xadj);CHKERRQ(ierr); 6862 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6863 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6864 } else { 6865 Mat subdomain_adj; 6866 IS new_ranks,new_ranks_contig; 6867 MatPartitioning partitioner; 6868 PetscInt rstart=0,rend=0; 6869 PetscInt *is_indices,*oldranks; 6870 PetscMPIInt size; 6871 PetscBool aggregate; 6872 6873 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6874 if (void_procs) { 6875 PetscInt prank = rank; 6876 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6877 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6878 for (i=0;i<xadj[1];i++) { 6879 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6880 } 6881 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6882 } else { 6883 oldranks = NULL; 6884 } 6885 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6886 if (aggregate) { /* TODO: all this part could be made more efficient */ 6887 PetscInt lrows,row,ncols,*cols; 6888 PetscMPIInt nrank; 6889 PetscScalar *vals; 6890 6891 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6892 lrows = 0; 6893 if (nrank<redprocs) { 6894 lrows = size/redprocs; 6895 if (nrank<size%redprocs) lrows++; 6896 } 6897 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6898 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6899 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6900 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6901 row = nrank; 6902 ncols = xadj[1]-xadj[0]; 6903 cols = adjncy; 6904 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6905 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6906 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6907 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6908 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6909 ierr = PetscFree(xadj);CHKERRQ(ierr); 6910 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6911 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6912 ierr = PetscFree(vals);CHKERRQ(ierr); 6913 if (use_vwgt) { 6914 Vec v; 6915 const PetscScalar *array; 6916 PetscInt nl; 6917 6918 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6919 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 6920 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6921 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6922 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6923 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6924 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6925 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6926 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6927 ierr = VecDestroy(&v);CHKERRQ(ierr); 6928 } 6929 } else { 6930 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6931 if (use_vwgt) { 6932 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6933 v_wgt[0] = n; 6934 } 6935 } 6936 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6937 6938 /* Partition */ 6939 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6940 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6941 if (v_wgt) { 6942 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6943 } 6944 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6945 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6946 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6947 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6948 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6949 6950 /* renumber new_ranks to avoid "holes" in new set of processors */ 6951 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6952 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6953 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6954 if (!aggregate) { 6955 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6956 #if defined(PETSC_USE_DEBUG) 6957 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6958 #endif 6959 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6960 } else if (oldranks) { 6961 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6962 } else { 6963 ranks_send_to_idx[0] = is_indices[0]; 6964 } 6965 } else { 6966 PetscInt idx = 0; 6967 PetscMPIInt tag; 6968 MPI_Request *reqs; 6969 6970 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6971 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6972 for (i=rstart;i<rend;i++) { 6973 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6974 } 6975 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6976 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6977 ierr = PetscFree(reqs);CHKERRQ(ierr); 6978 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6979 #if defined(PETSC_USE_DEBUG) 6980 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6981 #endif 6982 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 6983 } else if (oldranks) { 6984 ranks_send_to_idx[0] = oldranks[idx]; 6985 } else { 6986 ranks_send_to_idx[0] = idx; 6987 } 6988 } 6989 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6990 /* clean up */ 6991 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6992 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6993 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6994 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 6995 } 6996 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 6997 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6998 6999 /* assemble parallel IS for sends */ 7000 i = 1; 7001 if (!color) i=0; 7002 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 7003 PetscFunctionReturn(0); 7004 } 7005 7006 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7007 7008 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7009 { 7010 Mat local_mat; 7011 IS is_sends_internal; 7012 PetscInt rows,cols,new_local_rows; 7013 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7014 PetscBool ismatis,isdense,newisdense,destroy_mat; 7015 ISLocalToGlobalMapping l2gmap; 7016 PetscInt* l2gmap_indices; 7017 const PetscInt* is_indices; 7018 MatType new_local_type; 7019 /* buffers */ 7020 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7021 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7022 PetscInt *recv_buffer_idxs_local; 7023 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 7024 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7025 /* MPI */ 7026 MPI_Comm comm,comm_n; 7027 PetscSubcomm subcomm; 7028 PetscMPIInt n_sends,n_recvs,commsize; 7029 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7030 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7031 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7032 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7033 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7034 PetscErrorCode ierr; 7035 7036 PetscFunctionBegin; 7037 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7038 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7039 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7040 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7041 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7042 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7043 PetscValidLogicalCollectiveBool(mat,reuse,6); 7044 PetscValidLogicalCollectiveInt(mat,nis,8); 7045 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7046 if (nvecs) { 7047 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7048 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7049 } 7050 /* further checks */ 7051 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7052 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7053 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7054 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7055 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7056 if (reuse && *mat_n) { 7057 PetscInt mrows,mcols,mnrows,mncols; 7058 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7059 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7060 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7061 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7062 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7063 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7064 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7065 } 7066 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7067 PetscValidLogicalCollectiveInt(mat,bs,0); 7068 7069 /* prepare IS for sending if not provided */ 7070 if (!is_sends) { 7071 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7072 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7073 } else { 7074 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7075 is_sends_internal = is_sends; 7076 } 7077 7078 /* get comm */ 7079 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7080 7081 /* compute number of sends */ 7082 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7083 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7084 7085 /* compute number of receives */ 7086 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 7087 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 7088 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 7089 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7090 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7091 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7092 ierr = PetscFree(iflags);CHKERRQ(ierr); 7093 7094 /* restrict comm if requested */ 7095 subcomm = 0; 7096 destroy_mat = PETSC_FALSE; 7097 if (restrict_comm) { 7098 PetscMPIInt color,subcommsize; 7099 7100 color = 0; 7101 if (restrict_full) { 7102 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7103 } else { 7104 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7105 } 7106 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7107 subcommsize = commsize - subcommsize; 7108 /* check if reuse has been requested */ 7109 if (reuse) { 7110 if (*mat_n) { 7111 PetscMPIInt subcommsize2; 7112 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 7113 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7114 comm_n = PetscObjectComm((PetscObject)*mat_n); 7115 } else { 7116 comm_n = PETSC_COMM_SELF; 7117 } 7118 } else { /* MAT_INITIAL_MATRIX */ 7119 PetscMPIInt rank; 7120 7121 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 7122 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7123 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7124 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7125 comm_n = PetscSubcommChild(subcomm); 7126 } 7127 /* flag to destroy *mat_n if not significative */ 7128 if (color) destroy_mat = PETSC_TRUE; 7129 } else { 7130 comm_n = comm; 7131 } 7132 7133 /* prepare send/receive buffers */ 7134 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 7135 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 7136 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 7137 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 7138 if (nis) { 7139 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 7140 } 7141 7142 /* Get data from local matrices */ 7143 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7144 /* TODO: See below some guidelines on how to prepare the local buffers */ 7145 /* 7146 send_buffer_vals should contain the raw values of the local matrix 7147 send_buffer_idxs should contain: 7148 - MatType_PRIVATE type 7149 - PetscInt size_of_l2gmap 7150 - PetscInt global_row_indices[size_of_l2gmap] 7151 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7152 */ 7153 else { 7154 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7155 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7156 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7157 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7158 send_buffer_idxs[1] = i; 7159 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7160 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 7161 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7162 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7163 for (i=0;i<n_sends;i++) { 7164 ilengths_vals[is_indices[i]] = len*len; 7165 ilengths_idxs[is_indices[i]] = len+2; 7166 } 7167 } 7168 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7169 /* additional is (if any) */ 7170 if (nis) { 7171 PetscMPIInt psum; 7172 PetscInt j; 7173 for (j=0,psum=0;j<nis;j++) { 7174 PetscInt plen; 7175 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7176 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7177 psum += len+1; /* indices + lenght */ 7178 } 7179 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7180 for (j=0,psum=0;j<nis;j++) { 7181 PetscInt plen; 7182 const PetscInt *is_array_idxs; 7183 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7184 send_buffer_idxs_is[psum] = plen; 7185 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7186 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 7187 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7188 psum += plen+1; /* indices + lenght */ 7189 } 7190 for (i=0;i<n_sends;i++) { 7191 ilengths_idxs_is[is_indices[i]] = psum; 7192 } 7193 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7194 } 7195 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7196 7197 buf_size_idxs = 0; 7198 buf_size_vals = 0; 7199 buf_size_idxs_is = 0; 7200 buf_size_vecs = 0; 7201 for (i=0;i<n_recvs;i++) { 7202 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7203 buf_size_vals += (PetscInt)olengths_vals[i]; 7204 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7205 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7206 } 7207 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7208 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7209 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7210 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7211 7212 /* get new tags for clean communications */ 7213 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7214 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7215 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7216 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7217 7218 /* allocate for requests */ 7219 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7220 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7221 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7222 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7223 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7224 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7225 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7226 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7227 7228 /* communications */ 7229 ptr_idxs = recv_buffer_idxs; 7230 ptr_vals = recv_buffer_vals; 7231 ptr_idxs_is = recv_buffer_idxs_is; 7232 ptr_vecs = recv_buffer_vecs; 7233 for (i=0;i<n_recvs;i++) { 7234 source_dest = onodes[i]; 7235 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7236 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7237 ptr_idxs += olengths_idxs[i]; 7238 ptr_vals += olengths_vals[i]; 7239 if (nis) { 7240 source_dest = onodes_is[i]; 7241 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7242 ptr_idxs_is += olengths_idxs_is[i]; 7243 } 7244 if (nvecs) { 7245 source_dest = onodes[i]; 7246 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7247 ptr_vecs += olengths_idxs[i]-2; 7248 } 7249 } 7250 for (i=0;i<n_sends;i++) { 7251 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7252 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7253 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7254 if (nis) { 7255 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7256 } 7257 if (nvecs) { 7258 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7259 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7260 } 7261 } 7262 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7263 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7264 7265 /* assemble new l2g map */ 7266 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7267 ptr_idxs = recv_buffer_idxs; 7268 new_local_rows = 0; 7269 for (i=0;i<n_recvs;i++) { 7270 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7271 ptr_idxs += olengths_idxs[i]; 7272 } 7273 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7274 ptr_idxs = recv_buffer_idxs; 7275 new_local_rows = 0; 7276 for (i=0;i<n_recvs;i++) { 7277 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 7278 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7279 ptr_idxs += olengths_idxs[i]; 7280 } 7281 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7282 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7283 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7284 7285 /* infer new local matrix type from received local matrices type */ 7286 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7287 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7288 if (n_recvs) { 7289 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7290 ptr_idxs = recv_buffer_idxs; 7291 for (i=0;i<n_recvs;i++) { 7292 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7293 new_local_type_private = MATAIJ_PRIVATE; 7294 break; 7295 } 7296 ptr_idxs += olengths_idxs[i]; 7297 } 7298 switch (new_local_type_private) { 7299 case MATDENSE_PRIVATE: 7300 new_local_type = MATSEQAIJ; 7301 bs = 1; 7302 break; 7303 case MATAIJ_PRIVATE: 7304 new_local_type = MATSEQAIJ; 7305 bs = 1; 7306 break; 7307 case MATBAIJ_PRIVATE: 7308 new_local_type = MATSEQBAIJ; 7309 break; 7310 case MATSBAIJ_PRIVATE: 7311 new_local_type = MATSEQSBAIJ; 7312 break; 7313 default: 7314 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7315 break; 7316 } 7317 } else { /* by default, new_local_type is seqaij */ 7318 new_local_type = MATSEQAIJ; 7319 bs = 1; 7320 } 7321 7322 /* create MATIS object if needed */ 7323 if (!reuse) { 7324 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7325 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7326 } else { 7327 /* it also destroys the local matrices */ 7328 if (*mat_n) { 7329 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7330 } else { /* this is a fake object */ 7331 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7332 } 7333 } 7334 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7335 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7336 7337 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7338 7339 /* Global to local map of received indices */ 7340 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7341 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7342 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7343 7344 /* restore attributes -> type of incoming data and its size */ 7345 buf_size_idxs = 0; 7346 for (i=0;i<n_recvs;i++) { 7347 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7348 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7349 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7350 } 7351 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7352 7353 /* set preallocation */ 7354 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7355 if (!newisdense) { 7356 PetscInt *new_local_nnz=0; 7357 7358 ptr_idxs = recv_buffer_idxs_local; 7359 if (n_recvs) { 7360 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7361 } 7362 for (i=0;i<n_recvs;i++) { 7363 PetscInt j; 7364 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7365 for (j=0;j<*(ptr_idxs+1);j++) { 7366 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7367 } 7368 } else { 7369 /* TODO */ 7370 } 7371 ptr_idxs += olengths_idxs[i]; 7372 } 7373 if (new_local_nnz) { 7374 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7375 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7376 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7377 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7378 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7379 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7380 } else { 7381 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7382 } 7383 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7384 } else { 7385 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7386 } 7387 7388 /* set values */ 7389 ptr_vals = recv_buffer_vals; 7390 ptr_idxs = recv_buffer_idxs_local; 7391 for (i=0;i<n_recvs;i++) { 7392 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7393 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7394 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7395 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7396 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7397 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7398 } else { 7399 /* TODO */ 7400 } 7401 ptr_idxs += olengths_idxs[i]; 7402 ptr_vals += olengths_vals[i]; 7403 } 7404 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7405 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7406 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7407 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7408 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7409 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7410 7411 #if 0 7412 if (!restrict_comm) { /* check */ 7413 Vec lvec,rvec; 7414 PetscReal infty_error; 7415 7416 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7417 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7418 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7419 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7420 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7421 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7422 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7423 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7424 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7425 } 7426 #endif 7427 7428 /* assemble new additional is (if any) */ 7429 if (nis) { 7430 PetscInt **temp_idxs,*count_is,j,psum; 7431 7432 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7433 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7434 ptr_idxs = recv_buffer_idxs_is; 7435 psum = 0; 7436 for (i=0;i<n_recvs;i++) { 7437 for (j=0;j<nis;j++) { 7438 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7439 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7440 psum += plen; 7441 ptr_idxs += plen+1; /* shift pointer to received data */ 7442 } 7443 } 7444 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7445 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7446 for (i=1;i<nis;i++) { 7447 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7448 } 7449 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7450 ptr_idxs = recv_buffer_idxs_is; 7451 for (i=0;i<n_recvs;i++) { 7452 for (j=0;j<nis;j++) { 7453 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7454 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7455 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7456 ptr_idxs += plen+1; /* shift pointer to received data */ 7457 } 7458 } 7459 for (i=0;i<nis;i++) { 7460 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7461 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7462 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7463 } 7464 ierr = PetscFree(count_is);CHKERRQ(ierr); 7465 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7466 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7467 } 7468 /* free workspace */ 7469 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7470 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7471 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7472 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7473 if (isdense) { 7474 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7475 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7476 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7477 } else { 7478 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7479 } 7480 if (nis) { 7481 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7482 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7483 } 7484 7485 if (nvecs) { 7486 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7487 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7488 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7489 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7490 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7491 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7492 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7493 /* set values */ 7494 ptr_vals = recv_buffer_vecs; 7495 ptr_idxs = recv_buffer_idxs_local; 7496 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7497 for (i=0;i<n_recvs;i++) { 7498 PetscInt j; 7499 for (j=0;j<*(ptr_idxs+1);j++) { 7500 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7501 } 7502 ptr_idxs += olengths_idxs[i]; 7503 ptr_vals += olengths_idxs[i]-2; 7504 } 7505 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7506 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7507 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7508 } 7509 7510 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7511 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7512 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7513 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7514 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7515 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7516 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7517 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7518 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7519 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7520 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7521 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7522 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7523 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7524 ierr = PetscFree(onodes);CHKERRQ(ierr); 7525 if (nis) { 7526 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7527 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7528 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7529 } 7530 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7531 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7532 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7533 for (i=0;i<nis;i++) { 7534 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7535 } 7536 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7537 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7538 } 7539 *mat_n = NULL; 7540 } 7541 PetscFunctionReturn(0); 7542 } 7543 7544 /* temporary hack into ksp private data structure */ 7545 #include <petsc/private/kspimpl.h> 7546 7547 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7548 { 7549 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7550 PC_IS *pcis = (PC_IS*)pc->data; 7551 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7552 Mat coarsedivudotp = NULL; 7553 Mat coarseG,t_coarse_mat_is; 7554 MatNullSpace CoarseNullSpace = NULL; 7555 ISLocalToGlobalMapping coarse_islg; 7556 IS coarse_is,*isarray; 7557 PetscInt i,im_active=-1,active_procs=-1; 7558 PetscInt nis,nisdofs,nisneu,nisvert; 7559 PC pc_temp; 7560 PCType coarse_pc_type; 7561 KSPType coarse_ksp_type; 7562 PetscBool multilevel_requested,multilevel_allowed; 7563 PetscBool coarse_reuse; 7564 PetscInt ncoarse,nedcfield; 7565 PetscBool compute_vecs = PETSC_FALSE; 7566 PetscScalar *array; 7567 MatReuse coarse_mat_reuse; 7568 PetscBool restr, full_restr, have_void; 7569 PetscMPIInt commsize; 7570 PetscErrorCode ierr; 7571 7572 PetscFunctionBegin; 7573 /* Assign global numbering to coarse dofs */ 7574 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7575 PetscInt ocoarse_size; 7576 compute_vecs = PETSC_TRUE; 7577 7578 pcbddc->new_primal_space = PETSC_TRUE; 7579 ocoarse_size = pcbddc->coarse_size; 7580 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7581 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7582 /* see if we can avoid some work */ 7583 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7584 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7585 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7586 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7587 coarse_reuse = PETSC_FALSE; 7588 } else { /* we can safely reuse already computed coarse matrix */ 7589 coarse_reuse = PETSC_TRUE; 7590 } 7591 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7592 coarse_reuse = PETSC_FALSE; 7593 } 7594 /* reset any subassembling information */ 7595 if (!coarse_reuse || pcbddc->recompute_topography) { 7596 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7597 } 7598 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7599 coarse_reuse = PETSC_TRUE; 7600 } 7601 /* assemble coarse matrix */ 7602 if (coarse_reuse && pcbddc->coarse_ksp) { 7603 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7604 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7605 coarse_mat_reuse = MAT_REUSE_MATRIX; 7606 } else { 7607 coarse_mat = NULL; 7608 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7609 } 7610 7611 /* creates temporary l2gmap and IS for coarse indexes */ 7612 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7613 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7614 7615 /* creates temporary MATIS object for coarse matrix */ 7616 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7617 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7618 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7619 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7620 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7621 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7622 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7623 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7624 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7625 7626 /* count "active" (i.e. with positive local size) and "void" processes */ 7627 im_active = !!(pcis->n); 7628 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7629 7630 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7631 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7632 /* full_restr : just use the receivers from the subassembling pattern */ 7633 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);CHKERRQ(ierr); 7634 coarse_mat_is = NULL; 7635 multilevel_allowed = PETSC_FALSE; 7636 multilevel_requested = PETSC_FALSE; 7637 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7638 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7639 if (multilevel_requested) { 7640 ncoarse = active_procs/pcbddc->coarsening_ratio; 7641 restr = PETSC_FALSE; 7642 full_restr = PETSC_FALSE; 7643 } else { 7644 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7645 restr = PETSC_TRUE; 7646 full_restr = PETSC_TRUE; 7647 } 7648 if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7649 ncoarse = PetscMax(1,ncoarse); 7650 if (!pcbddc->coarse_subassembling) { 7651 if (pcbddc->coarsening_ratio > 1) { 7652 if (multilevel_requested) { 7653 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7654 } else { 7655 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7656 } 7657 } else { 7658 PetscMPIInt rank; 7659 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7660 have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE; 7661 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7662 } 7663 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7664 PetscInt psum; 7665 if (pcbddc->coarse_ksp) psum = 1; 7666 else psum = 0; 7667 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7668 if (ncoarse < commsize) have_void = PETSC_TRUE; 7669 } 7670 /* determine if we can go multilevel */ 7671 if (multilevel_requested) { 7672 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7673 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7674 } 7675 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7676 7677 /* dump subassembling pattern */ 7678 if (pcbddc->dbg_flag && multilevel_allowed) { 7679 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7680 } 7681 7682 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7683 nedcfield = -1; 7684 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7685 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7686 const PetscInt *idxs; 7687 ISLocalToGlobalMapping tmap; 7688 7689 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7690 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7691 /* allocate space for temporary storage */ 7692 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7693 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7694 /* allocate for IS array */ 7695 nisdofs = pcbddc->n_ISForDofsLocal; 7696 if (pcbddc->nedclocal) { 7697 if (pcbddc->nedfield > -1) { 7698 nedcfield = pcbddc->nedfield; 7699 } else { 7700 nedcfield = 0; 7701 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7702 nisdofs = 1; 7703 } 7704 } 7705 nisneu = !!pcbddc->NeumannBoundariesLocal; 7706 nisvert = 0; /* nisvert is not used */ 7707 nis = nisdofs + nisneu + nisvert; 7708 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7709 /* dofs splitting */ 7710 for (i=0;i<nisdofs;i++) { 7711 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7712 if (nedcfield != i) { 7713 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7714 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7715 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7716 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7717 } else { 7718 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7719 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7720 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7721 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7722 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7723 } 7724 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7725 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7726 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7727 } 7728 /* neumann boundaries */ 7729 if (pcbddc->NeumannBoundariesLocal) { 7730 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7731 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7732 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7733 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7734 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7735 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7736 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7737 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7738 } 7739 /* free memory */ 7740 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7741 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7742 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7743 } else { 7744 nis = 0; 7745 nisdofs = 0; 7746 nisneu = 0; 7747 nisvert = 0; 7748 isarray = NULL; 7749 } 7750 /* destroy no longer needed map */ 7751 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7752 7753 /* subassemble */ 7754 if (multilevel_allowed) { 7755 Vec vp[1]; 7756 PetscInt nvecs = 0; 7757 PetscBool reuse,reuser; 7758 7759 if (coarse_mat) reuse = PETSC_TRUE; 7760 else reuse = PETSC_FALSE; 7761 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7762 vp[0] = NULL; 7763 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7764 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7765 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7766 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7767 nvecs = 1; 7768 7769 if (pcbddc->divudotp) { 7770 Mat B,loc_divudotp; 7771 Vec v,p; 7772 IS dummy; 7773 PetscInt np; 7774 7775 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7776 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7777 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7778 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7779 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7780 ierr = VecSet(p,1.);CHKERRQ(ierr); 7781 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7782 ierr = VecDestroy(&p);CHKERRQ(ierr); 7783 ierr = MatDestroy(&B);CHKERRQ(ierr); 7784 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7785 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7786 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7787 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7788 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7789 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7790 ierr = VecDestroy(&v);CHKERRQ(ierr); 7791 } 7792 } 7793 if (reuser) { 7794 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7795 } else { 7796 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7797 } 7798 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7799 PetscScalar *arraym,*arrayv; 7800 PetscInt nl; 7801 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7802 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7803 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7804 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7805 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7806 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7807 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7808 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7809 } else { 7810 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7811 } 7812 } else { 7813 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7814 } 7815 if (coarse_mat_is || coarse_mat) { 7816 PetscMPIInt size; 7817 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7818 if (!multilevel_allowed) { 7819 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7820 } else { 7821 Mat A; 7822 7823 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7824 if (coarse_mat_is) { 7825 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7826 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7827 coarse_mat = coarse_mat_is; 7828 } 7829 /* be sure we don't have MatSeqDENSE as local mat */ 7830 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7831 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7832 } 7833 } 7834 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7835 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7836 7837 /* create local to global scatters for coarse problem */ 7838 if (compute_vecs) { 7839 PetscInt lrows; 7840 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7841 if (coarse_mat) { 7842 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7843 } else { 7844 lrows = 0; 7845 } 7846 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7847 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7848 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7849 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7850 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7851 } 7852 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7853 7854 /* set defaults for coarse KSP and PC */ 7855 if (multilevel_allowed) { 7856 coarse_ksp_type = KSPRICHARDSON; 7857 coarse_pc_type = PCBDDC; 7858 } else { 7859 coarse_ksp_type = KSPPREONLY; 7860 coarse_pc_type = PCREDUNDANT; 7861 } 7862 7863 /* print some info if requested */ 7864 if (pcbddc->dbg_flag) { 7865 if (!multilevel_allowed) { 7866 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7867 if (multilevel_requested) { 7868 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7869 } else if (pcbddc->max_levels) { 7870 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7871 } 7872 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7873 } 7874 } 7875 7876 /* communicate coarse discrete gradient */ 7877 coarseG = NULL; 7878 if (pcbddc->nedcG && multilevel_allowed) { 7879 MPI_Comm ccomm; 7880 if (coarse_mat) { 7881 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7882 } else { 7883 ccomm = MPI_COMM_NULL; 7884 } 7885 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7886 } 7887 7888 /* create the coarse KSP object only once with defaults */ 7889 if (coarse_mat) { 7890 PetscBool isredundant,isnn,isbddc; 7891 PetscViewer dbg_viewer = NULL; 7892 7893 if (pcbddc->dbg_flag) { 7894 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7895 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7896 } 7897 if (!pcbddc->coarse_ksp) { 7898 char prefix[256],str_level[16]; 7899 size_t len; 7900 7901 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7902 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7903 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7904 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7905 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7906 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7907 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7908 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7909 /* TODO is this logic correct? should check for coarse_mat type */ 7910 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7911 /* prefix */ 7912 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7913 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7914 if (!pcbddc->current_level) { 7915 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7916 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7917 } else { 7918 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7919 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7920 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7921 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7922 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 7923 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7924 } 7925 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7926 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7927 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7928 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7929 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7930 /* allow user customization */ 7931 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7932 } 7933 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7934 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7935 if (nisdofs) { 7936 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7937 for (i=0;i<nisdofs;i++) { 7938 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7939 } 7940 } 7941 if (nisneu) { 7942 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7943 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7944 } 7945 if (nisvert) { 7946 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7947 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7948 } 7949 if (coarseG) { 7950 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7951 } 7952 7953 /* get some info after set from options */ 7954 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7955 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 7956 if (isbddc && !multilevel_allowed) { 7957 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7958 isbddc = PETSC_FALSE; 7959 } 7960 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 7961 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7962 if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) { 7963 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 7964 isbddc = PETSC_TRUE; 7965 } 7966 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7967 if (isredundant) { 7968 KSP inner_ksp; 7969 PC inner_pc; 7970 7971 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7972 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7973 } 7974 7975 /* parameters which miss an API */ 7976 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7977 if (isbddc) { 7978 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7979 7980 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7981 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7982 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7983 if (pcbddc_coarse->benign_saddle_point) { 7984 Mat coarsedivudotp_is; 7985 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7986 IS row,col; 7987 const PetscInt *gidxs; 7988 PetscInt n,st,M,N; 7989 7990 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7991 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7992 st = st-n; 7993 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7994 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 7995 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 7996 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7997 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 7998 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 7999 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 8000 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 8001 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 8002 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 8003 ierr = ISDestroy(&row);CHKERRQ(ierr); 8004 ierr = ISDestroy(&col);CHKERRQ(ierr); 8005 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 8006 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8007 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8008 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8009 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8010 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8011 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8012 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8013 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8014 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8015 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8016 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8017 } 8018 } 8019 8020 /* propagate symmetry info of coarse matrix */ 8021 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8022 if (pc->pmat->symmetric_set) { 8023 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8024 } 8025 if (pc->pmat->hermitian_set) { 8026 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8027 } 8028 if (pc->pmat->spd_set) { 8029 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8030 } 8031 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8032 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8033 } 8034 /* set operators */ 8035 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8036 if (pcbddc->dbg_flag) { 8037 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8038 } 8039 } 8040 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8041 ierr = PetscFree(isarray);CHKERRQ(ierr); 8042 #if 0 8043 { 8044 PetscViewer viewer; 8045 char filename[256]; 8046 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8047 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8048 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8049 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8050 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8051 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8052 } 8053 #endif 8054 8055 if (pcbddc->coarse_ksp) { 8056 Vec crhs,csol; 8057 8058 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8059 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8060 if (!csol) { 8061 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8062 } 8063 if (!crhs) { 8064 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8065 } 8066 } 8067 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8068 8069 /* compute null space for coarse solver if the benign trick has been requested */ 8070 if (pcbddc->benign_null) { 8071 8072 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8073 for (i=0;i<pcbddc->benign_n;i++) { 8074 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8075 } 8076 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8077 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8078 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8079 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8080 if (coarse_mat) { 8081 Vec nullv; 8082 PetscScalar *array,*array2; 8083 PetscInt nl; 8084 8085 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8086 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8087 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8088 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8089 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 8090 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8091 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8092 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8093 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8094 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8095 } 8096 } 8097 8098 if (pcbddc->coarse_ksp) { 8099 PetscBool ispreonly; 8100 8101 if (CoarseNullSpace) { 8102 PetscBool isnull; 8103 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8104 if (isnull) { 8105 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8106 } 8107 /* TODO: add local nullspaces (if any) */ 8108 } 8109 /* setup coarse ksp */ 8110 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8111 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8112 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8113 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 8114 KSP check_ksp; 8115 KSPType check_ksp_type; 8116 PC check_pc; 8117 Vec check_vec,coarse_vec; 8118 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8119 PetscInt its; 8120 PetscBool compute_eigs; 8121 PetscReal *eigs_r,*eigs_c; 8122 PetscInt neigs; 8123 const char *prefix; 8124 8125 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8126 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8127 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8128 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8129 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8130 /* prevent from setup unneeded object */ 8131 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8132 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8133 if (ispreonly) { 8134 check_ksp_type = KSPPREONLY; 8135 compute_eigs = PETSC_FALSE; 8136 } else { 8137 check_ksp_type = KSPGMRES; 8138 compute_eigs = PETSC_TRUE; 8139 } 8140 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8141 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8142 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8143 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8144 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8145 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8146 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8147 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8148 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8149 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8150 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8151 /* create random vec */ 8152 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8153 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8154 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8155 /* solve coarse problem */ 8156 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8157 /* set eigenvalue estimation if preonly has not been requested */ 8158 if (compute_eigs) { 8159 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8160 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8161 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8162 if (neigs) { 8163 lambda_max = eigs_r[neigs-1]; 8164 lambda_min = eigs_r[0]; 8165 if (pcbddc->use_coarse_estimates) { 8166 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8167 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8168 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8169 } 8170 } 8171 } 8172 } 8173 8174 /* check coarse problem residual error */ 8175 if (pcbddc->dbg_flag) { 8176 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8177 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8178 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8179 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8180 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8181 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8182 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8183 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8184 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8185 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8186 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8187 if (CoarseNullSpace) { 8188 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8189 } 8190 if (compute_eigs) { 8191 PetscReal lambda_max_s,lambda_min_s; 8192 KSPConvergedReason reason; 8193 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8194 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8195 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8196 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8197 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8198 for (i=0;i<neigs;i++) { 8199 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8200 } 8201 } 8202 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8203 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8204 } 8205 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8206 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8207 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8208 if (compute_eigs) { 8209 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8210 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8211 } 8212 } 8213 } 8214 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8215 /* print additional info */ 8216 if (pcbddc->dbg_flag) { 8217 /* waits until all processes reaches this point */ 8218 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8219 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 8220 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8221 } 8222 8223 /* free memory */ 8224 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8225 PetscFunctionReturn(0); 8226 } 8227 8228 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8229 { 8230 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8231 PC_IS* pcis = (PC_IS*)pc->data; 8232 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8233 IS subset,subset_mult,subset_n; 8234 PetscInt local_size,coarse_size=0; 8235 PetscInt *local_primal_indices=NULL; 8236 const PetscInt *t_local_primal_indices; 8237 PetscErrorCode ierr; 8238 8239 PetscFunctionBegin; 8240 /* Compute global number of coarse dofs */ 8241 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8242 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8243 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8244 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8245 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8246 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8247 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8248 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8249 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8250 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8251 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8252 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8253 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 8254 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8255 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8256 8257 /* check numbering */ 8258 if (pcbddc->dbg_flag) { 8259 PetscScalar coarsesum,*array,*array2; 8260 PetscInt i; 8261 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8262 8263 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8264 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8265 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8266 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8267 /* counter */ 8268 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8269 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8270 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8271 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8272 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8273 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8274 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8275 for (i=0;i<pcbddc->local_primal_size;i++) { 8276 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8277 } 8278 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8279 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8280 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8281 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8282 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8283 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8284 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8285 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8286 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8287 for (i=0;i<pcis->n;i++) { 8288 if (array[i] != 0.0 && array[i] != array2[i]) { 8289 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8290 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8291 set_error = PETSC_TRUE; 8292 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8293 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8294 } 8295 } 8296 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8297 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8298 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8299 for (i=0;i<pcis->n;i++) { 8300 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8301 } 8302 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8303 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8304 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8305 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8306 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8307 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8308 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8309 PetscInt *gidxs; 8310 8311 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8312 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8313 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8314 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8315 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8316 for (i=0;i<pcbddc->local_primal_size;i++) { 8317 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8318 } 8319 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8320 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8321 } 8322 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8323 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8324 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8325 } 8326 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 8327 /* get back data */ 8328 *coarse_size_n = coarse_size; 8329 *local_primal_indices_n = local_primal_indices; 8330 PetscFunctionReturn(0); 8331 } 8332 8333 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8334 { 8335 IS localis_t; 8336 PetscInt i,lsize,*idxs,n; 8337 PetscScalar *vals; 8338 PetscErrorCode ierr; 8339 8340 PetscFunctionBegin; 8341 /* get indices in local ordering exploiting local to global map */ 8342 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8343 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8344 for (i=0;i<lsize;i++) vals[i] = 1.0; 8345 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8346 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8347 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8348 if (idxs) { /* multilevel guard */ 8349 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 8350 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8351 } 8352 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8353 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8354 ierr = PetscFree(vals);CHKERRQ(ierr); 8355 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8356 /* now compute set in local ordering */ 8357 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8358 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8359 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8360 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8361 for (i=0,lsize=0;i<n;i++) { 8362 if (PetscRealPart(vals[i]) > 0.5) { 8363 lsize++; 8364 } 8365 } 8366 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8367 for (i=0,lsize=0;i<n;i++) { 8368 if (PetscRealPart(vals[i]) > 0.5) { 8369 idxs[lsize++] = i; 8370 } 8371 } 8372 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8373 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8374 *localis = localis_t; 8375 PetscFunctionReturn(0); 8376 } 8377 8378 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8379 { 8380 PC_IS *pcis=(PC_IS*)pc->data; 8381 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8382 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8383 Mat S_j; 8384 PetscInt *used_xadj,*used_adjncy; 8385 PetscBool free_used_adj; 8386 PetscErrorCode ierr; 8387 8388 PetscFunctionBegin; 8389 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8390 free_used_adj = PETSC_FALSE; 8391 if (pcbddc->sub_schurs_layers == -1) { 8392 used_xadj = NULL; 8393 used_adjncy = NULL; 8394 } else { 8395 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8396 used_xadj = pcbddc->mat_graph->xadj; 8397 used_adjncy = pcbddc->mat_graph->adjncy; 8398 } else if (pcbddc->computed_rowadj) { 8399 used_xadj = pcbddc->mat_graph->xadj; 8400 used_adjncy = pcbddc->mat_graph->adjncy; 8401 } else { 8402 PetscBool flg_row=PETSC_FALSE; 8403 const PetscInt *xadj,*adjncy; 8404 PetscInt nvtxs; 8405 8406 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8407 if (flg_row) { 8408 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8409 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8410 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8411 free_used_adj = PETSC_TRUE; 8412 } else { 8413 pcbddc->sub_schurs_layers = -1; 8414 used_xadj = NULL; 8415 used_adjncy = NULL; 8416 } 8417 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8418 } 8419 } 8420 8421 /* setup sub_schurs data */ 8422 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8423 if (!sub_schurs->schur_explicit) { 8424 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8425 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8426 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8427 } else { 8428 Mat change = NULL; 8429 Vec scaling = NULL; 8430 IS change_primal = NULL, iP; 8431 PetscInt benign_n; 8432 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8433 PetscBool isseqaij,need_change = PETSC_FALSE; 8434 PetscBool discrete_harmonic = PETSC_FALSE; 8435 8436 if (!pcbddc->use_vertices && reuse_solvers) { 8437 PetscInt n_vertices; 8438 8439 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8440 reuse_solvers = (PetscBool)!n_vertices; 8441 } 8442 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8443 if (!isseqaij) { 8444 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8445 if (matis->A == pcbddc->local_mat) { 8446 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8447 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8448 } else { 8449 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8450 } 8451 } 8452 if (!pcbddc->benign_change_explicit) { 8453 benign_n = pcbddc->benign_n; 8454 } else { 8455 benign_n = 0; 8456 } 8457 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8458 We need a global reduction to avoid possible deadlocks. 8459 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8460 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8461 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8462 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8463 need_change = (PetscBool)(!need_change); 8464 } 8465 /* If the user defines additional constraints, we import them here. 8466 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8467 if (need_change) { 8468 PC_IS *pcisf; 8469 PC_BDDC *pcbddcf; 8470 PC pcf; 8471 8472 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8473 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8474 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8475 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8476 8477 /* hacks */ 8478 pcisf = (PC_IS*)pcf->data; 8479 pcisf->is_B_local = pcis->is_B_local; 8480 pcisf->vec1_N = pcis->vec1_N; 8481 pcisf->BtoNmap = pcis->BtoNmap; 8482 pcisf->n = pcis->n; 8483 pcisf->n_B = pcis->n_B; 8484 pcbddcf = (PC_BDDC*)pcf->data; 8485 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8486 pcbddcf->mat_graph = pcbddc->mat_graph; 8487 pcbddcf->use_faces = PETSC_TRUE; 8488 pcbddcf->use_change_of_basis = PETSC_TRUE; 8489 pcbddcf->use_change_on_faces = PETSC_TRUE; 8490 pcbddcf->use_qr_single = PETSC_TRUE; 8491 pcbddcf->fake_change = PETSC_TRUE; 8492 8493 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8494 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8495 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8496 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8497 change = pcbddcf->ConstraintMatrix; 8498 pcbddcf->ConstraintMatrix = NULL; 8499 8500 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8501 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8502 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8503 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8504 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8505 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8506 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8507 pcf->ops->destroy = NULL; 8508 pcf->ops->reset = NULL; 8509 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8510 } 8511 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8512 8513 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 8514 if (iP) { 8515 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 8516 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 8517 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8518 } 8519 if (discrete_harmonic) { 8520 Mat A; 8521 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 8522 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 8523 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 8524 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8525 ierr = MatDestroy(&A);CHKERRQ(ierr); 8526 } else { 8527 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8528 } 8529 ierr = MatDestroy(&change);CHKERRQ(ierr); 8530 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8531 } 8532 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8533 8534 /* free adjacency */ 8535 if (free_used_adj) { 8536 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8537 } 8538 PetscFunctionReturn(0); 8539 } 8540 8541 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8542 { 8543 PC_IS *pcis=(PC_IS*)pc->data; 8544 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8545 PCBDDCGraph graph; 8546 PetscErrorCode ierr; 8547 8548 PetscFunctionBegin; 8549 /* attach interface graph for determining subsets */ 8550 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8551 IS verticesIS,verticescomm; 8552 PetscInt vsize,*idxs; 8553 8554 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8555 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8556 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8557 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8558 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8559 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8560 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8561 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8562 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8563 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8564 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8565 } else { 8566 graph = pcbddc->mat_graph; 8567 } 8568 /* print some info */ 8569 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8570 IS vertices; 8571 PetscInt nv,nedges,nfaces; 8572 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8573 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8574 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8575 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8576 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8577 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8578 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8579 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8580 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8581 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8582 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8583 } 8584 8585 /* sub_schurs init */ 8586 if (!pcbddc->sub_schurs) { 8587 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8588 } 8589 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8590 pcbddc->sub_schurs->prefix = ((PetscObject)pc)->prefix; 8591 8592 /* free graph struct */ 8593 if (pcbddc->sub_schurs_rebuild) { 8594 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8595 } 8596 PetscFunctionReturn(0); 8597 } 8598 8599 PetscErrorCode PCBDDCCheckOperator(PC pc) 8600 { 8601 PC_IS *pcis=(PC_IS*)pc->data; 8602 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8603 PetscErrorCode ierr; 8604 8605 PetscFunctionBegin; 8606 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8607 IS zerodiag = NULL; 8608 Mat S_j,B0_B=NULL; 8609 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8610 PetscScalar *p0_check,*array,*array2; 8611 PetscReal norm; 8612 PetscInt i; 8613 8614 /* B0 and B0_B */ 8615 if (zerodiag) { 8616 IS dummy; 8617 8618 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8619 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8620 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8621 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8622 } 8623 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8624 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8625 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8626 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8627 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8628 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8629 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8630 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8631 /* S_j */ 8632 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8633 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8634 8635 /* mimic vector in \widetilde{W}_\Gamma */ 8636 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8637 /* continuous in primal space */ 8638 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8639 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8640 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8641 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8642 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8643 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8644 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8645 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8646 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8647 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8648 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8649 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8650 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8651 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8652 8653 /* assemble rhs for coarse problem */ 8654 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8655 /* local with Schur */ 8656 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8657 if (zerodiag) { 8658 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8659 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8660 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8661 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8662 } 8663 /* sum on primal nodes the local contributions */ 8664 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8665 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8666 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8667 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8668 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8669 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8670 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8671 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8672 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8673 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8674 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8675 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8676 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8677 /* scale primal nodes (BDDC sums contibutions) */ 8678 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8679 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8680 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8681 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8682 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8683 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8684 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8685 /* global: \widetilde{B0}_B w_\Gamma */ 8686 if (zerodiag) { 8687 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8688 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8689 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8690 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8691 } 8692 /* BDDC */ 8693 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8694 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8695 8696 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8697 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8698 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8699 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8700 for (i=0;i<pcbddc->benign_n;i++) { 8701 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8702 } 8703 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8704 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8705 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8706 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8707 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8708 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8709 } 8710 PetscFunctionReturn(0); 8711 } 8712 8713 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8714 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8715 { 8716 Mat At; 8717 IS rows; 8718 PetscInt rst,ren; 8719 PetscErrorCode ierr; 8720 PetscLayout rmap; 8721 8722 PetscFunctionBegin; 8723 rst = ren = 0; 8724 if (ccomm != MPI_COMM_NULL) { 8725 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8726 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8727 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8728 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8729 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8730 } 8731 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8732 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8733 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8734 8735 if (ccomm != MPI_COMM_NULL) { 8736 Mat_MPIAIJ *a,*b; 8737 IS from,to; 8738 Vec gvec; 8739 PetscInt lsize; 8740 8741 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8742 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8743 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8744 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8745 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8746 a = (Mat_MPIAIJ*)At->data; 8747 b = (Mat_MPIAIJ*)(*B)->data; 8748 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8749 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8750 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8751 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8752 b->A = a->A; 8753 b->B = a->B; 8754 8755 b->donotstash = a->donotstash; 8756 b->roworiented = a->roworiented; 8757 b->rowindices = 0; 8758 b->rowvalues = 0; 8759 b->getrowactive = PETSC_FALSE; 8760 8761 (*B)->rmap = rmap; 8762 (*B)->factortype = A->factortype; 8763 (*B)->assembled = PETSC_TRUE; 8764 (*B)->insertmode = NOT_SET_VALUES; 8765 (*B)->preallocated = PETSC_TRUE; 8766 8767 if (a->colmap) { 8768 #if defined(PETSC_USE_CTABLE) 8769 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8770 #else 8771 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8772 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8773 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8774 #endif 8775 } else b->colmap = 0; 8776 if (a->garray) { 8777 PetscInt len; 8778 len = a->B->cmap->n; 8779 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8780 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8781 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8782 } else b->garray = 0; 8783 8784 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8785 b->lvec = a->lvec; 8786 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8787 8788 /* cannot use VecScatterCopy */ 8789 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8790 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8791 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8792 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8793 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8794 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8795 ierr = ISDestroy(&from);CHKERRQ(ierr); 8796 ierr = ISDestroy(&to);CHKERRQ(ierr); 8797 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8798 } 8799 ierr = MatDestroy(&At);CHKERRQ(ierr); 8800 PetscFunctionReturn(0); 8801 } 8802