1 #include <../src/mat/impls/aij/seq/aij.h> 2 #include <../src/ksp/pc/impls/bddc/bddc.h> 3 #include <../src/ksp/pc/impls/bddc/bddcprivate.h> 4 #include <petscdmplex.h> 5 #include <petscblaslapack.h> 6 #include <petsc/private/sfimpl.h> 7 #include <petsc/private/dmpleximpl.h> 8 #include <petscdmda.h> 9 10 static PetscErrorCode MatMPIAIJRestrict(Mat,MPI_Comm,Mat*); 11 12 /* if range is true, it returns B s.t. span{B} = range(A) 13 if range is false, it returns B s.t. range(B) _|_ range(A) */ 14 PetscErrorCode MatDenseOrthogonalRangeOrComplement(Mat A, PetscBool range, PetscInt lw, PetscScalar *work, PetscReal *rwork, Mat *B) 15 { 16 #if !defined(PETSC_USE_COMPLEX) 17 PetscScalar *uwork,*data,*U, ds = 0.; 18 PetscReal *sing; 19 PetscBLASInt bM,bN,lwork,lierr,di = 1; 20 PetscInt ulw,i,nr,nc,n; 21 PetscErrorCode ierr; 22 23 PetscFunctionBegin; 24 #if defined(PETSC_MISSING_LAPACK_GESVD) 25 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"LAPACK _GESVD not available"); 26 #else 27 ierr = MatGetSize(A,&nr,&nc);CHKERRQ(ierr); 28 if (!nr || !nc) PetscFunctionReturn(0); 29 30 /* workspace */ 31 if (!work) { 32 ulw = PetscMax(PetscMax(1,5*PetscMin(nr,nc)),3*PetscMin(nr,nc)+PetscMax(nr,nc)); 33 ierr = PetscMalloc1(ulw,&uwork);CHKERRQ(ierr); 34 } else { 35 ulw = lw; 36 uwork = work; 37 } 38 n = PetscMin(nr,nc); 39 if (!rwork) { 40 ierr = PetscMalloc1(n,&sing);CHKERRQ(ierr); 41 } else { 42 sing = rwork; 43 } 44 45 /* SVD */ 46 ierr = PetscMalloc1(nr*nr,&U);CHKERRQ(ierr); 47 ierr = PetscBLASIntCast(nr,&bM);CHKERRQ(ierr); 48 ierr = PetscBLASIntCast(nc,&bN);CHKERRQ(ierr); 49 ierr = PetscBLASIntCast(ulw,&lwork);CHKERRQ(ierr); 50 ierr = MatDenseGetArray(A,&data);CHKERRQ(ierr); 51 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 52 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("A","N",&bM,&bN,data,&bM,sing,U,&bM,&ds,&di,uwork,&lwork,&lierr)); 53 ierr = PetscFPTrapPop();CHKERRQ(ierr); 54 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 55 ierr = MatDenseRestoreArray(A,&data);CHKERRQ(ierr); 56 for (i=0;i<n;i++) if (sing[i] < PETSC_SMALL) break; 57 if (!rwork) { 58 ierr = PetscFree(sing);CHKERRQ(ierr); 59 } 60 if (!work) { 61 ierr = PetscFree(uwork);CHKERRQ(ierr); 62 } 63 /* create B */ 64 if (!range) { 65 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,nr-i,NULL,B);CHKERRQ(ierr); 66 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 67 ierr = PetscMemcpy(data,U+nr*i,(nr-i)*nr*sizeof(PetscScalar));CHKERRQ(ierr); 68 } else { 69 ierr = MatCreateSeqDense(PETSC_COMM_SELF,nr,i,NULL,B);CHKERRQ(ierr); 70 ierr = MatDenseGetArray(*B,&data);CHKERRQ(ierr); 71 ierr = PetscMemcpy(data,U,i*nr*sizeof(PetscScalar));CHKERRQ(ierr); 72 } 73 ierr = MatDenseRestoreArray(*B,&data);CHKERRQ(ierr); 74 ierr = PetscFree(U);CHKERRQ(ierr); 75 #endif 76 #else /* PETSC_USE_COMPLEX */ 77 PetscFunctionBegin; 78 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented for complexes"); 79 #endif 80 PetscFunctionReturn(0); 81 } 82 83 /* TODO REMOVE */ 84 #if defined(PRINT_GDET) 85 static int inc = 0; 86 static int lev = 0; 87 #endif 88 89 PetscErrorCode PCBDDCComputeNedelecChangeEdge(Mat lG, IS edge, IS extrow, IS extcol, IS corners, Mat* Gins, Mat* GKins, PetscScalar cvals[2], PetscScalar *work, PetscReal *rwork) 90 { 91 PetscErrorCode ierr; 92 Mat GE,GEd; 93 PetscInt rsize,csize,esize; 94 PetscScalar *ptr; 95 96 PetscFunctionBegin; 97 ierr = ISGetSize(edge,&esize);CHKERRQ(ierr); 98 if (!esize) PetscFunctionReturn(0); 99 ierr = ISGetSize(extrow,&rsize);CHKERRQ(ierr); 100 ierr = ISGetSize(extcol,&csize);CHKERRQ(ierr); 101 102 /* gradients */ 103 ptr = work + 5*esize; 104 ierr = MatCreateSubMatrix(lG,extrow,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 105 ierr = MatCreateSeqDense(PETSC_COMM_SELF,rsize,csize,ptr,Gins);CHKERRQ(ierr); 106 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,Gins);CHKERRQ(ierr); 107 ierr = MatDestroy(&GE);CHKERRQ(ierr); 108 109 /* constants */ 110 ptr += rsize*csize; 111 ierr = MatCreateSeqDense(PETSC_COMM_SELF,esize,csize,ptr,&GEd);CHKERRQ(ierr); 112 ierr = MatCreateSubMatrix(lG,edge,extcol,MAT_INITIAL_MATRIX,&GE);CHKERRQ(ierr); 113 ierr = MatConvert(GE,MATSEQDENSE,MAT_REUSE_MATRIX,&GEd);CHKERRQ(ierr); 114 ierr = MatDestroy(&GE);CHKERRQ(ierr); 115 ierr = MatDenseOrthogonalRangeOrComplement(GEd,PETSC_FALSE,5*esize,work,rwork,GKins);CHKERRQ(ierr); 116 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 117 118 if (corners) { 119 Mat GEc; 120 PetscScalar *vals,v; 121 122 ierr = MatCreateSubMatrix(lG,edge,corners,MAT_INITIAL_MATRIX,&GEc);CHKERRQ(ierr); 123 ierr = MatTransposeMatMult(GEc,*GKins,MAT_INITIAL_MATRIX,1.0,&GEd);CHKERRQ(ierr); 124 ierr = MatDenseGetArray(GEd,&vals);CHKERRQ(ierr); 125 /* v = PetscAbsScalar(vals[0]) */; 126 v = 1.; 127 cvals[0] = vals[0]/v; 128 cvals[1] = vals[1]/v; 129 ierr = MatDenseRestoreArray(GEd,&vals);CHKERRQ(ierr); 130 ierr = MatScale(*GKins,1./v);CHKERRQ(ierr); 131 #if defined(PRINT_GDET) 132 { 133 PetscViewer viewer; 134 char filename[256]; 135 sprintf(filename,"Gdet_l%d_r%d_cc%d.m",lev,PetscGlobalRank,inc++); 136 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 137 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 138 ierr = PetscObjectSetName((PetscObject)GEc,"GEc");CHKERRQ(ierr); 139 ierr = MatView(GEc,viewer);CHKERRQ(ierr); 140 ierr = PetscObjectSetName((PetscObject)(*GKins),"GK");CHKERRQ(ierr); 141 ierr = MatView(*GKins,viewer);CHKERRQ(ierr); 142 ierr = PetscObjectSetName((PetscObject)GEd,"Gproj");CHKERRQ(ierr); 143 ierr = MatView(GEd,viewer);CHKERRQ(ierr); 144 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 145 } 146 #endif 147 ierr = MatDestroy(&GEd);CHKERRQ(ierr); 148 ierr = MatDestroy(&GEc);CHKERRQ(ierr); 149 } 150 151 PetscFunctionReturn(0); 152 } 153 154 PetscErrorCode PCBDDCNedelecSupport(PC pc) 155 { 156 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 157 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 158 Mat G,T,conn,lG,lGt,lGis,lGall,lGe,lGinit; 159 Vec tvec; 160 PetscSF sfv; 161 ISLocalToGlobalMapping el2g,vl2g,fl2g,al2g; 162 MPI_Comm comm; 163 IS lned,primals,allprimals,nedfieldlocal; 164 IS *eedges,*extrows,*extcols,*alleedges; 165 PetscBT btv,bte,btvc,btb,btbd,btvcand,btvi,btee,bter; 166 PetscScalar *vals,*work; 167 PetscReal *rwork; 168 const PetscInt *idxs,*ii,*jj,*iit,*jjt; 169 PetscInt ne,nv,Lv,order,n,field; 170 PetscInt n_neigh,*neigh,*n_shared,**shared; 171 PetscInt i,j,extmem,cum,maxsize,nee; 172 PetscInt *extrow,*extrowcum,*marks,*vmarks,*gidxs; 173 PetscInt *sfvleaves,*sfvroots; 174 PetscInt *corners,*cedges; 175 PetscInt *ecount,**eneighs,*vcount,**vneighs; 176 #if defined(PETSC_USE_DEBUG) 177 PetscInt *emarks; 178 #endif 179 PetscBool print,eerr,done,lrc[2],conforming,global,singular,setprimal; 180 PetscErrorCode ierr; 181 182 PetscFunctionBegin; 183 /* If the discrete gradient is defined for a subset of dofs and global is true, 184 it assumes G is given in global ordering for all the dofs. 185 Otherwise, the ordering is global for the Nedelec field */ 186 order = pcbddc->nedorder; 187 conforming = pcbddc->conforming; 188 field = pcbddc->nedfield; 189 global = pcbddc->nedglobal; 190 setprimal = PETSC_FALSE; 191 print = PETSC_FALSE; 192 singular = PETSC_FALSE; 193 194 /* Command line customization */ 195 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC Nedelec options","PC");CHKERRQ(ierr); 196 ierr = PetscOptionsBool("-pc_bddc_nedelec_field_primal","All edge dofs set as primals: Toselli's algorithm C",NULL,setprimal,&setprimal,NULL);CHKERRQ(ierr); 197 ierr = PetscOptionsBool("-pc_bddc_nedelec_singular","Infer nullspace from discrete gradient",NULL,singular,&singular,NULL);CHKERRQ(ierr); 198 ierr = PetscOptionsInt("-pc_bddc_nedelec_order","Test variable order code (to be removed)",NULL,order,&order,NULL);CHKERRQ(ierr); 199 /* print debug info TODO: to be removed */ 200 ierr = PetscOptionsBool("-pc_bddc_nedelec_print","Print debug info",NULL,print,&print,NULL);CHKERRQ(ierr); 201 ierr = PetscOptionsEnd();CHKERRQ(ierr); 202 203 /* Return if there are no edges in the decomposition and the problem is not singular */ 204 ierr = MatGetLocalToGlobalMapping(pc->pmat,&al2g,NULL);CHKERRQ(ierr); 205 ierr = ISLocalToGlobalMappingGetSize(al2g,&n);CHKERRQ(ierr); 206 ierr = PetscObjectGetComm((PetscObject)pc,&comm);CHKERRQ(ierr); 207 if (!singular) { 208 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 209 lrc[0] = PETSC_FALSE; 210 for (i=0;i<n;i++) { 211 if (PetscRealPart(vals[i]) > 2.) { 212 lrc[0] = PETSC_TRUE; 213 break; 214 } 215 } 216 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 217 ierr = MPIU_Allreduce(&lrc[0],&lrc[1],1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 218 if (!lrc[1]) PetscFunctionReturn(0); 219 } 220 221 /* Get Nedelec field */ 222 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 223 if (pcbddc->n_ISForDofsLocal && field >= pcbddc->n_ISForDofsLocal) SETERRQ2(comm,PETSC_ERR_USER,"Invalid field for Nedelec %d: number of fields is %d",field,pcbddc->n_ISForDofsLocal); 224 if (pcbddc->n_ISForDofsLocal && field >= 0) { 225 ierr = PetscObjectReference((PetscObject)pcbddc->ISForDofsLocal[field]);CHKERRQ(ierr); 226 nedfieldlocal = pcbddc->ISForDofsLocal[field]; 227 ierr = ISGetLocalSize(nedfieldlocal,&ne);CHKERRQ(ierr); 228 } else if (!pcbddc->n_ISForDofsLocal && field != PETSC_DECIDE) { 229 ne = n; 230 nedfieldlocal = NULL; 231 global = PETSC_TRUE; 232 } else if (field == PETSC_DECIDE) { 233 PetscInt rst,ren,*idx; 234 235 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 236 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 237 ierr = MatGetOwnershipRange(pcbddc->discretegradient,&rst,&ren);CHKERRQ(ierr); 238 for (i=rst;i<ren;i++) { 239 PetscInt nc; 240 241 ierr = MatGetRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 242 if (nc > 1) matis->sf_rootdata[i-rst] = 1; 243 ierr = MatRestoreRow(pcbddc->discretegradient,i,&nc,NULL,NULL);CHKERRQ(ierr); 244 } 245 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 246 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 247 ierr = PetscMalloc1(n,&idx);CHKERRQ(ierr); 248 for (i=0,ne=0;i<n;i++) if (matis->sf_leafdata[i]) idx[ne++] = i; 249 ierr = ISCreateGeneral(comm,ne,idx,PETSC_OWN_POINTER,&nedfieldlocal);CHKERRQ(ierr); 250 } else { 251 SETERRQ(comm,PETSC_ERR_USER,"When multiple fields are present, the Nedelec field has to be specified"); 252 } 253 254 /* Sanity checks */ 255 if (!order && !conforming) SETERRQ(comm,PETSC_ERR_SUP,"Variable order and non-conforming spaces are not supported at the same time"); 256 if (pcbddc->user_ChangeOfBasisMatrix) SETERRQ(comm,PETSC_ERR_SUP,"Cannot generate Nedelec support with user defined change of basis"); 257 if (order && ne%order) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"The number of local edge dofs %d it's not a multiple of the order %d",ne,order); 258 259 /* Just set primal dofs and return */ 260 if (setprimal) { 261 IS enedfieldlocal; 262 PetscInt *eidxs; 263 264 ierr = PetscMalloc1(ne,&eidxs);CHKERRQ(ierr); 265 ierr = VecGetArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 266 if (nedfieldlocal) { 267 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 268 for (i=0,cum=0;i<ne;i++) { 269 if (PetscRealPart(vals[idxs[i]]) > 2.) { 270 eidxs[cum++] = idxs[i]; 271 } 272 } 273 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 274 } else { 275 for (i=0,cum=0;i<ne;i++) { 276 if (PetscRealPart(vals[i]) > 2.) { 277 eidxs[cum++] = i; 278 } 279 } 280 } 281 ierr = VecRestoreArrayRead(matis->counter,(const PetscScalar**)&vals);CHKERRQ(ierr); 282 ierr = ISCreateGeneral(comm,cum,eidxs,PETSC_COPY_VALUES,&enedfieldlocal);CHKERRQ(ierr); 283 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,enedfieldlocal);CHKERRQ(ierr); 284 ierr = PetscFree(eidxs);CHKERRQ(ierr); 285 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 286 ierr = ISDestroy(&enedfieldlocal);CHKERRQ(ierr); 287 PetscFunctionReturn(0); 288 } 289 290 /* Compute some l2g maps */ 291 if (nedfieldlocal) { 292 IS is; 293 294 /* need to map from the local Nedelec field to local numbering */ 295 ierr = ISLocalToGlobalMappingCreateIS(nedfieldlocal,&fl2g);CHKERRQ(ierr); 296 /* need to map from the local Nedelec field to global numbering for the whole dofs*/ 297 ierr = ISLocalToGlobalMappingApplyIS(al2g,nedfieldlocal,&is);CHKERRQ(ierr); 298 ierr = ISLocalToGlobalMappingCreateIS(is,&al2g);CHKERRQ(ierr); 299 /* need to map from the local Nedelec field to global numbering (for Nedelec only) */ 300 if (global) { 301 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 302 el2g = al2g; 303 } else { 304 IS gis; 305 306 ierr = ISRenumber(is,NULL,NULL,&gis);CHKERRQ(ierr); 307 ierr = ISLocalToGlobalMappingCreateIS(gis,&el2g);CHKERRQ(ierr); 308 ierr = ISDestroy(&gis);CHKERRQ(ierr); 309 } 310 ierr = ISDestroy(&is);CHKERRQ(ierr); 311 } else { 312 /* restore default */ 313 pcbddc->nedfield = -1; 314 /* one ref for the destruction of al2g, one for el2g */ 315 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 316 ierr = PetscObjectReference((PetscObject)al2g);CHKERRQ(ierr); 317 el2g = al2g; 318 fl2g = NULL; 319 } 320 321 /* Start communication to drop connections for interior edges (for cc analysis only) */ 322 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscInt));CHKERRQ(ierr); 323 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscInt));CHKERRQ(ierr); 324 if (nedfieldlocal) { 325 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 326 for (i=0;i<ne;i++) matis->sf_leafdata[idxs[i]] = 1; 327 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 328 } else { 329 for (i=0;i<ne;i++) matis->sf_leafdata[i] = 1; 330 } 331 ierr = PetscSFReduceBegin(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 332 ierr = PetscSFReduceEnd(matis->sf,MPIU_INT,matis->sf_leafdata,matis->sf_rootdata,MPI_SUM);CHKERRQ(ierr); 333 334 if (!singular) { /* drop connections with interior edges to avoid unneeded communications and memory movements */ 335 ierr = MatDuplicate(pcbddc->discretegradient,MAT_COPY_VALUES,&G);CHKERRQ(ierr); 336 ierr = MatSetOption(G,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 337 if (global) { 338 PetscInt rst; 339 340 ierr = MatGetOwnershipRange(G,&rst,NULL);CHKERRQ(ierr); 341 for (i=0,cum=0;i<pc->pmat->rmap->n;i++) { 342 if (matis->sf_rootdata[i] < 2) { 343 matis->sf_rootdata[cum++] = i + rst; 344 } 345 } 346 ierr = MatSetOption(G,MAT_NO_OFF_PROC_ZERO_ROWS,PETSC_TRUE);CHKERRQ(ierr); 347 ierr = MatZeroRows(G,cum,matis->sf_rootdata,0.,NULL,NULL);CHKERRQ(ierr); 348 } else { 349 PetscInt *tbz; 350 351 ierr = PetscMalloc1(ne,&tbz);CHKERRQ(ierr); 352 ierr = PetscSFBcastBegin(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 353 ierr = PetscSFBcastEnd(matis->sf,MPIU_INT,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 354 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 355 for (i=0,cum=0;i<ne;i++) 356 if (matis->sf_leafdata[idxs[i]] == 1) 357 tbz[cum++] = i; 358 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 359 ierr = ISLocalToGlobalMappingApply(el2g,cum,tbz,tbz);CHKERRQ(ierr); 360 ierr = MatZeroRows(G,cum,tbz,0.,NULL,NULL);CHKERRQ(ierr); 361 ierr = PetscFree(tbz);CHKERRQ(ierr); 362 } 363 } else { /* we need the entire G to infer the nullspace */ 364 ierr = PetscObjectReference((PetscObject)pcbddc->discretegradient);CHKERRQ(ierr); 365 G = pcbddc->discretegradient; 366 } 367 368 /* Extract subdomain relevant rows of G */ 369 ierr = ISLocalToGlobalMappingGetIndices(el2g,&idxs);CHKERRQ(ierr); 370 ierr = ISCreateGeneral(comm,ne,idxs,PETSC_USE_POINTER,&lned);CHKERRQ(ierr); 371 ierr = MatCreateSubMatrix(G,lned,NULL,MAT_INITIAL_MATRIX,&lGall);CHKERRQ(ierr); 372 ierr = ISLocalToGlobalMappingRestoreIndices(el2g,&idxs);CHKERRQ(ierr); 373 ierr = ISDestroy(&lned);CHKERRQ(ierr); 374 ierr = MatConvert(lGall,MATIS,MAT_INITIAL_MATRIX,&lGis);CHKERRQ(ierr); 375 ierr = MatDestroy(&lGall);CHKERRQ(ierr); 376 ierr = MatISGetLocalMat(lGis,&lG);CHKERRQ(ierr); 377 378 /* SF for nodal dofs communications */ 379 ierr = MatGetLocalSize(G,NULL,&Lv);CHKERRQ(ierr); 380 ierr = MatGetLocalToGlobalMapping(lGis,NULL,&vl2g);CHKERRQ(ierr); 381 ierr = PetscObjectReference((PetscObject)vl2g);CHKERRQ(ierr); 382 ierr = ISLocalToGlobalMappingGetSize(vl2g,&nv);CHKERRQ(ierr); 383 ierr = PetscSFCreate(comm,&sfv);CHKERRQ(ierr); 384 ierr = ISLocalToGlobalMappingGetIndices(vl2g,&idxs);CHKERRQ(ierr); 385 ierr = PetscSFSetGraphLayout(sfv,lGis->cmap,nv,NULL,PETSC_OWN_POINTER,idxs);CHKERRQ(ierr); 386 ierr = ISLocalToGlobalMappingRestoreIndices(vl2g,&idxs);CHKERRQ(ierr); 387 i = singular ? 2 : 1; 388 ierr = PetscMalloc2(i*nv,&sfvleaves,i*Lv,&sfvroots);CHKERRQ(ierr); 389 390 /* Destroy temporary G created in MATIS format and modified G */ 391 ierr = PetscObjectReference((PetscObject)lG);CHKERRQ(ierr); 392 ierr = MatDestroy(&lGis);CHKERRQ(ierr); 393 ierr = MatDestroy(&G);CHKERRQ(ierr); 394 395 if (print) { 396 ierr = PetscObjectSetName((PetscObject)lG,"initial_lG");CHKERRQ(ierr); 397 ierr = MatView(lG,NULL);CHKERRQ(ierr); 398 } 399 400 /* Save lG for values insertion in change of basis */ 401 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGinit);CHKERRQ(ierr); 402 403 /* Analyze the edge-nodes connections (duplicate lG) */ 404 ierr = MatDuplicate(lG,MAT_COPY_VALUES,&lGe);CHKERRQ(ierr); 405 ierr = MatSetOption(lGe,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 406 ierr = PetscBTCreate(nv,&btv);CHKERRQ(ierr); 407 ierr = PetscBTCreate(ne,&bte);CHKERRQ(ierr); 408 ierr = PetscBTCreate(ne,&btb);CHKERRQ(ierr); 409 ierr = PetscBTCreate(ne,&btbd);CHKERRQ(ierr); 410 ierr = PetscBTCreate(nv,&btvcand);CHKERRQ(ierr); 411 /* need to import the boundary specification to ensure the 412 proper detection of coarse edges' endpoints */ 413 if (pcbddc->DirichletBoundariesLocal) { 414 IS is; 415 416 if (fl2g) { 417 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->DirichletBoundariesLocal,&is);CHKERRQ(ierr); 418 } else { 419 is = pcbddc->DirichletBoundariesLocal; 420 } 421 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 422 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 423 for (i=0;i<cum;i++) { 424 if (idxs[i] >= 0) { 425 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 426 ierr = PetscBTSet(btbd,idxs[i]);CHKERRQ(ierr); 427 } 428 } 429 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 430 if (fl2g) { 431 ierr = ISDestroy(&is);CHKERRQ(ierr); 432 } 433 } 434 if (pcbddc->NeumannBoundariesLocal) { 435 IS is; 436 437 if (fl2g) { 438 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_MASK,pcbddc->NeumannBoundariesLocal,&is);CHKERRQ(ierr); 439 } else { 440 is = pcbddc->NeumannBoundariesLocal; 441 } 442 ierr = ISGetLocalSize(is,&cum);CHKERRQ(ierr); 443 ierr = ISGetIndices(is,&idxs);CHKERRQ(ierr); 444 for (i=0;i<cum;i++) { 445 if (idxs[i] >= 0) { 446 ierr = PetscBTSet(btb,idxs[i]);CHKERRQ(ierr); 447 } 448 } 449 ierr = ISRestoreIndices(is,&idxs);CHKERRQ(ierr); 450 if (fl2g) { 451 ierr = ISDestroy(&is);CHKERRQ(ierr); 452 } 453 } 454 455 /* Count neighs per dof */ 456 ierr = PetscCalloc1(ne,&ecount);CHKERRQ(ierr); 457 ierr = PetscMalloc1(ne,&eneighs);CHKERRQ(ierr); 458 ierr = ISLocalToGlobalMappingGetInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 459 for (i=1,cum=0;i<n_neigh;i++) { 460 cum += n_shared[i]; 461 for (j=0;j<n_shared[i];j++) { 462 ecount[shared[i][j]]++; 463 } 464 } 465 if (ne) { 466 ierr = PetscMalloc1(cum,&eneighs[0]);CHKERRQ(ierr); 467 } 468 for (i=1;i<ne;i++) eneighs[i] = eneighs[i-1] + ecount[i-1]; 469 ierr = PetscMemzero(ecount,ne*sizeof(PetscInt));CHKERRQ(ierr); 470 for (i=1;i<n_neigh;i++) { 471 for (j=0;j<n_shared[i];j++) { 472 PetscInt k = shared[i][j]; 473 eneighs[k][ecount[k]] = neigh[i]; 474 ecount[k]++; 475 } 476 } 477 for (i=0;i<ne;i++) { 478 ierr = PetscSortRemoveDupsInt(&ecount[i],eneighs[i]);CHKERRQ(ierr); 479 } 480 ierr = ISLocalToGlobalMappingRestoreInfo(el2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 481 ierr = PetscCalloc1(nv,&vcount);CHKERRQ(ierr); 482 ierr = PetscMalloc1(nv,&vneighs);CHKERRQ(ierr); 483 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 484 for (i=1,cum=0;i<n_neigh;i++) { 485 cum += n_shared[i]; 486 for (j=0;j<n_shared[i];j++) { 487 vcount[shared[i][j]]++; 488 } 489 } 490 if (nv) { 491 ierr = PetscMalloc1(cum,&vneighs[0]);CHKERRQ(ierr); 492 } 493 for (i=1;i<nv;i++) vneighs[i] = vneighs[i-1] + vcount[i-1]; 494 ierr = PetscMemzero(vcount,nv*sizeof(PetscInt));CHKERRQ(ierr); 495 for (i=1;i<n_neigh;i++) { 496 for (j=0;j<n_shared[i];j++) { 497 PetscInt k = shared[i][j]; 498 vneighs[k][vcount[k]] = neigh[i]; 499 vcount[k]++; 500 } 501 } 502 for (i=0;i<nv;i++) { 503 ierr = PetscSortRemoveDupsInt(&vcount[i],vneighs[i]);CHKERRQ(ierr); 504 } 505 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 506 507 /* need to remove coarse faces' dofs and coarse edges' dirichlet dofs 508 for proper detection of coarse edges' endpoints */ 509 ierr = PetscBTCreate(ne,&btee);CHKERRQ(ierr); 510 for (i=0;i<ne;i++) { 511 if ((ecount[i] > 1 && !PetscBTLookup(btbd,i)) || (ecount[i] == 1 && PetscBTLookup(btb,i))) { 512 ierr = PetscBTSet(btee,i);CHKERRQ(ierr); 513 } 514 } 515 ierr = PetscMalloc1(ne,&marks);CHKERRQ(ierr); 516 if (!conforming) { 517 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 518 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 519 } 520 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 521 ierr = MatSeqAIJGetArray(lGe,&vals);CHKERRQ(ierr); 522 cum = 0; 523 for (i=0;i<ne;i++) { 524 /* eliminate rows corresponding to edge dofs belonging to coarse faces */ 525 if (!PetscBTLookup(btee,i)) { 526 marks[cum++] = i; 527 continue; 528 } 529 /* set badly connected edge dofs as primal */ 530 if (!conforming) { 531 if (ii[i+1]-ii[i] != order + 1) { /* every row of G on the coarse edge should list order+1 nodal dofs */ 532 marks[cum++] = i; 533 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 534 for (j=ii[i];j<ii[i+1];j++) { 535 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 536 } 537 } else { 538 /* every edge dofs should be connected trough a certain number of nodal dofs 539 to other edge dofs belonging to coarse edges 540 - at most 2 endpoints 541 - order-1 interior nodal dofs 542 - no undefined nodal dofs (nconn < order) 543 */ 544 PetscInt ends = 0,ints = 0, undef = 0; 545 for (j=ii[i];j<ii[i+1];j++) { 546 PetscInt v = jj[j],k; 547 PetscInt nconn = iit[v+1]-iit[v]; 548 for (k=iit[v];k<iit[v+1];k++) if (!PetscBTLookup(btee,jjt[k])) nconn--; 549 if (nconn > order) ends++; 550 else if (nconn == order) ints++; 551 else undef++; 552 } 553 if (undef || ends > 2 || ints != order -1) { 554 marks[cum++] = i; 555 ierr = PetscBTSet(bte,i);CHKERRQ(ierr); 556 for (j=ii[i];j<ii[i+1];j++) { 557 ierr = PetscBTSet(btv,jj[j]);CHKERRQ(ierr); 558 } 559 } 560 } 561 } 562 /* We assume the order on the element edge is ii[i+1]-ii[i]-1 */ 563 if (!order && ii[i+1] != ii[i]) { 564 PetscScalar val = 1./(ii[i+1]-ii[i]-1); 565 for (j=ii[i];j<ii[i+1];j++) vals[j] = val; 566 } 567 } 568 ierr = PetscBTDestroy(&btee);CHKERRQ(ierr); 569 ierr = MatSeqAIJRestoreArray(lGe,&vals);CHKERRQ(ierr); 570 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 571 if (!conforming) { 572 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 573 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 574 } 575 ierr = MatZeroRows(lGe,cum,marks,0.,NULL,NULL);CHKERRQ(ierr); 576 577 /* identify splitpoints and corner candidates */ 578 ierr = MatTranspose(lGe,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 579 if (print) { 580 ierr = PetscObjectSetName((PetscObject)lGe,"edgerestr_lG");CHKERRQ(ierr); 581 ierr = MatView(lGe,NULL);CHKERRQ(ierr); 582 ierr = PetscObjectSetName((PetscObject)lGt,"edgerestr_lGt");CHKERRQ(ierr); 583 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 584 } 585 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 586 ierr = MatSeqAIJGetArray(lGt,&vals);CHKERRQ(ierr); 587 for (i=0;i<nv;i++) { 588 PetscInt ord = order, test = ii[i+1]-ii[i], vc = vcount[i]; 589 PetscBool sneighs = PETSC_TRUE, bdir = PETSC_FALSE; 590 if (!order) { /* variable order */ 591 PetscReal vorder = 0.; 592 593 for (j=ii[i];j<ii[i+1];j++) vorder += PetscRealPart(vals[j]); 594 test = PetscFloorReal(vorder+10.*PETSC_SQRT_MACHINE_EPSILON); 595 if (vorder-test > PETSC_SQRT_MACHINE_EPSILON) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected value for vorder: %g (%d)",vorder,test); 596 ord = 1; 597 } 598 #if defined(PETSC_USE_DEBUG) 599 if (test%ord) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected number of edge dofs %d connected with nodal dof %d with order %d",test,i,ord); 600 #endif 601 for (j=ii[i];j<ii[i+1] && sneighs;j++) { 602 if (PetscBTLookup(btbd,jj[j])) { 603 bdir = PETSC_TRUE; 604 break; 605 } 606 if (vc != ecount[jj[j]]) { 607 sneighs = PETSC_FALSE; 608 } else { 609 PetscInt k,*vn = vneighs[i], *en = eneighs[jj[j]]; 610 for (k=0;k<vc;k++) { 611 if (vn[k] != en[k]) { 612 sneighs = PETSC_FALSE; 613 break; 614 } 615 } 616 } 617 } 618 if (!sneighs || test >= 3*ord || bdir) { /* splitpoints */ 619 if (print) PetscPrintf(PETSC_COMM_SELF,"SPLITPOINT %d (%d %d %d)\n",i,!sneighs,test >= 3*ord,bdir); 620 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 621 } else if (test == ord) { 622 if (order == 1 || (!order && ii[i+1]-ii[i] == 1)) { 623 if (print) PetscPrintf(PETSC_COMM_SELF,"ENDPOINT %d\n",i); 624 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 625 } else { 626 if (print) PetscPrintf(PETSC_COMM_SELF,"CORNER CANDIDATE %d\n",i); 627 ierr = PetscBTSet(btvcand,i);CHKERRQ(ierr); 628 } 629 } 630 } 631 ierr = PetscFree(ecount);CHKERRQ(ierr); 632 ierr = PetscFree(vcount);CHKERRQ(ierr); 633 if (ne) { 634 ierr = PetscFree(eneighs[0]);CHKERRQ(ierr); 635 } 636 if (nv) { 637 ierr = PetscFree(vneighs[0]);CHKERRQ(ierr); 638 } 639 ierr = PetscFree(eneighs);CHKERRQ(ierr); 640 ierr = PetscFree(vneighs);CHKERRQ(ierr); 641 ierr = PetscBTDestroy(&btbd);CHKERRQ(ierr); 642 643 /* a candidate is valid if it is connected to another candidate via a non-primal edge dof */ 644 if (order != 1) { 645 if (print) PetscPrintf(PETSC_COMM_SELF,"INSPECTING CANDIDATES\n"); 646 ierr = MatGetRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 647 for (i=0;i<nv;i++) { 648 if (PetscBTLookup(btvcand,i)) { 649 PetscBool found = PETSC_FALSE; 650 for (j=ii[i];j<ii[i+1] && !found;j++) { 651 PetscInt k,e = jj[j]; 652 if (PetscBTLookup(bte,e)) continue; 653 for (k=iit[e];k<iit[e+1];k++) { 654 PetscInt v = jjt[k]; 655 if (v != i && PetscBTLookup(btvcand,v)) { 656 found = PETSC_TRUE; 657 break; 658 } 659 } 660 } 661 if (!found) { 662 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d CLEARED\n",i); 663 ierr = PetscBTClear(btvcand,i);CHKERRQ(ierr); 664 } else { 665 if (print) PetscPrintf(PETSC_COMM_SELF," CANDIDATE %d ACCEPTED\n",i); 666 } 667 } 668 } 669 ierr = MatRestoreRowIJ(lGe,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 670 } 671 ierr = MatSeqAIJRestoreArray(lGt,&vals);CHKERRQ(ierr); 672 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 673 ierr = MatDestroy(&lGe);CHKERRQ(ierr); 674 675 /* Get the local G^T explicitly */ 676 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 677 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 678 ierr = MatSetOption(lGt,MAT_KEEP_NONZERO_PATTERN,PETSC_FALSE);CHKERRQ(ierr); 679 680 /* Mark interior nodal dofs */ 681 ierr = ISLocalToGlobalMappingGetInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 682 ierr = PetscBTCreate(nv,&btvi);CHKERRQ(ierr); 683 for (i=1;i<n_neigh;i++) { 684 for (j=0;j<n_shared[i];j++) { 685 ierr = PetscBTSet(btvi,shared[i][j]);CHKERRQ(ierr); 686 } 687 } 688 ierr = ISLocalToGlobalMappingRestoreInfo(vl2g,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 689 690 /* communicate corners and splitpoints */ 691 ierr = PetscMalloc1(nv,&vmarks);CHKERRQ(ierr); 692 ierr = PetscMemzero(sfvleaves,nv*sizeof(PetscInt));CHKERRQ(ierr); 693 ierr = PetscMemzero(sfvroots,Lv*sizeof(PetscInt));CHKERRQ(ierr); 694 for (i=0;i<nv;i++) if (PetscUnlikely(PetscBTLookup(btv,i))) sfvleaves[i] = 1; 695 696 if (print) { 697 IS tbz; 698 699 cum = 0; 700 for (i=0;i<nv;i++) 701 if (sfvleaves[i]) 702 vmarks[cum++] = i; 703 704 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 705 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_local");CHKERRQ(ierr); 706 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 707 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 708 } 709 710 ierr = PetscSFReduceBegin(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 711 ierr = PetscSFReduceEnd(sfv,MPIU_INT,sfvleaves,sfvroots,MPI_SUM);CHKERRQ(ierr); 712 ierr = PetscSFBcastBegin(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 713 ierr = PetscSFBcastEnd(sfv,MPIU_INT,sfvroots,sfvleaves);CHKERRQ(ierr); 714 715 /* Zero rows of lGt corresponding to identified corners 716 and interior nodal dofs */ 717 cum = 0; 718 for (i=0;i<nv;i++) { 719 if (sfvleaves[i]) { 720 vmarks[cum++] = i; 721 ierr = PetscBTSet(btv,i);CHKERRQ(ierr); 722 } 723 if (!PetscBTLookup(btvi,i)) vmarks[cum++] = i; 724 } 725 ierr = PetscBTDestroy(&btvi);CHKERRQ(ierr); 726 if (print) { 727 IS tbz; 728 729 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,vmarks,PETSC_COPY_VALUES,&tbz);CHKERRQ(ierr); 730 ierr = PetscObjectSetName((PetscObject)tbz,"corners_to_be_zeroed_with_interior");CHKERRQ(ierr); 731 ierr = ISView(tbz,NULL);CHKERRQ(ierr); 732 ierr = ISDestroy(&tbz);CHKERRQ(ierr); 733 } 734 ierr = MatZeroRows(lGt,cum,vmarks,0.,NULL,NULL);CHKERRQ(ierr); 735 ierr = PetscFree(vmarks);CHKERRQ(ierr); 736 ierr = PetscSFDestroy(&sfv);CHKERRQ(ierr); 737 ierr = PetscFree2(sfvleaves,sfvroots);CHKERRQ(ierr); 738 739 /* Recompute G */ 740 ierr = MatDestroy(&lG);CHKERRQ(ierr); 741 ierr = MatTranspose(lGt,MAT_INITIAL_MATRIX,&lG);CHKERRQ(ierr); 742 if (print) { 743 ierr = PetscObjectSetName((PetscObject)lG,"used_lG");CHKERRQ(ierr); 744 ierr = MatView(lG,NULL);CHKERRQ(ierr); 745 ierr = PetscObjectSetName((PetscObject)lGt,"used_lGt");CHKERRQ(ierr); 746 ierr = MatView(lGt,NULL);CHKERRQ(ierr); 747 } 748 749 /* Get primal dofs (if any) */ 750 cum = 0; 751 for (i=0;i<ne;i++) { 752 if (PetscUnlikely(PetscBTLookup(bte,i))) marks[cum++] = i; 753 } 754 if (fl2g) { 755 ierr = ISLocalToGlobalMappingApply(fl2g,cum,marks,marks);CHKERRQ(ierr); 756 } 757 ierr = ISCreateGeneral(comm,cum,marks,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 758 if (print) { 759 ierr = PetscObjectSetName((PetscObject)primals,"prescribed_primal_dofs");CHKERRQ(ierr); 760 ierr = ISView(primals,NULL);CHKERRQ(ierr); 761 } 762 ierr = PetscBTDestroy(&bte);CHKERRQ(ierr); 763 /* TODO: what if the user passed in some of them ? */ 764 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 765 ierr = ISDestroy(&primals);CHKERRQ(ierr); 766 767 /* Compute edge connectivity */ 768 ierr = PetscObjectSetOptionsPrefix((PetscObject)lG,"econn_");CHKERRQ(ierr); 769 ierr = MatMatMultSymbolic(lG,lGt,PETSC_DEFAULT,&conn);CHKERRQ(ierr); 770 ierr = MatGetRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 771 if (fl2g) { 772 PetscBT btf; 773 PetscInt *iia,*jja,*iiu,*jju; 774 PetscBool rest = PETSC_FALSE,free = PETSC_FALSE; 775 776 /* create CSR for all local dofs */ 777 ierr = PetscMalloc1(n+1,&iia);CHKERRQ(ierr); 778 if (pcbddc->mat_graph->nvtxs_csr) { /* the user has passed in a CSR graph */ 779 if (pcbddc->mat_graph->nvtxs_csr != n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_USER,"Invalid size of CSR graph %d. Should be %d\n",pcbddc->mat_graph->nvtxs_csr,n); 780 iiu = pcbddc->mat_graph->xadj; 781 jju = pcbddc->mat_graph->adjncy; 782 } else if (pcbddc->use_local_adj) { 783 rest = PETSC_TRUE; 784 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 785 } else { 786 free = PETSC_TRUE; 787 ierr = PetscMalloc2(n+1,&iiu,n,&jju);CHKERRQ(ierr); 788 iiu[0] = 0; 789 for (i=0;i<n;i++) { 790 iiu[i+1] = i+1; 791 jju[i] = -1; 792 } 793 } 794 795 /* import sizes of CSR */ 796 iia[0] = 0; 797 for (i=0;i<n;i++) iia[i+1] = iiu[i+1]-iiu[i]; 798 799 /* overwrite entries corresponding to the Nedelec field */ 800 ierr = PetscBTCreate(n,&btf);CHKERRQ(ierr); 801 ierr = ISGetIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 802 for (i=0;i<ne;i++) { 803 ierr = PetscBTSet(btf,idxs[i]);CHKERRQ(ierr); 804 iia[idxs[i]+1] = ii[i+1]-ii[i]; 805 } 806 807 /* iia in CSR */ 808 for (i=0;i<n;i++) iia[i+1] += iia[i]; 809 810 /* jja in CSR */ 811 ierr = PetscMalloc1(iia[n],&jja);CHKERRQ(ierr); 812 for (i=0;i<n;i++) 813 if (!PetscBTLookup(btf,i)) 814 for (j=0;j<iiu[i+1]-iiu[i];j++) 815 jja[iia[i]+j] = jju[iiu[i]+j]; 816 817 /* map edge dofs connectivity */ 818 if (jj) { 819 ierr = ISLocalToGlobalMappingApply(fl2g,ii[ne],jj,(PetscInt *)jj);CHKERRQ(ierr); 820 for (i=0;i<ne;i++) { 821 PetscInt e = idxs[i]; 822 for (j=0;j<ii[i+1]-ii[i];j++) jja[iia[e]+j] = jj[ii[i]+j]; 823 } 824 } 825 ierr = ISRestoreIndices(nedfieldlocal,&idxs);CHKERRQ(ierr); 826 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,iia,jja,PETSC_OWN_POINTER);CHKERRQ(ierr); 827 if (rest) { 828 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&i,(const PetscInt**)&iiu,(const PetscInt**)&jju,&done);CHKERRQ(ierr); 829 } 830 if (free) { 831 ierr = PetscFree2(iiu,jju);CHKERRQ(ierr); 832 } 833 ierr = PetscBTDestroy(&btf);CHKERRQ(ierr); 834 } else { 835 ierr = PCBDDCSetLocalAdjacencyGraph(pc,n,ii,jj,PETSC_USE_POINTER);CHKERRQ(ierr); 836 } 837 838 /* Analyze interface for edge dofs */ 839 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 840 pcbddc->mat_graph->twodim = PETSC_FALSE; 841 842 /* Get coarse edges in the edge space */ 843 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 844 ierr = MatRestoreRowIJ(conn,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 845 846 if (fl2g) { 847 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 848 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 849 for (i=0;i<nee;i++) { 850 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 851 } 852 } else { 853 eedges = alleedges; 854 primals = allprimals; 855 } 856 857 /* Mark fine edge dofs with their coarse edge id */ 858 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 859 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 860 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 861 for (i=0;i<cum;i++) marks[idxs[i]] = nee+1; 862 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 863 if (print) { 864 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs");CHKERRQ(ierr); 865 ierr = ISView(primals,NULL);CHKERRQ(ierr); 866 } 867 868 maxsize = 0; 869 for (i=0;i<nee;i++) { 870 PetscInt size,mark = i+1; 871 872 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 873 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 874 for (j=0;j<size;j++) marks[idxs[j]] = mark; 875 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 876 maxsize = PetscMax(maxsize,size); 877 } 878 879 /* Find coarse edge endpoints */ 880 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 881 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 882 for (i=0;i<nee;i++) { 883 PetscInt mark = i+1,size; 884 885 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 886 if (!size && nedfieldlocal) continue; 887 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 888 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 889 if (print) { 890 PetscPrintf(PETSC_COMM_SELF,"ENDPOINTS ANALYSIS EDGE %d\n",i); 891 ISView(eedges[i],NULL); 892 } 893 for (j=0;j<size;j++) { 894 PetscInt k, ee = idxs[j]; 895 if (print) PetscPrintf(PETSC_COMM_SELF," idx %d\n",ee); 896 for (k=ii[ee];k<ii[ee+1];k++) { 897 if (print) PetscPrintf(PETSC_COMM_SELF," inspect %d\n",jj[k]); 898 if (PetscBTLookup(btv,jj[k])) { 899 if (print) PetscPrintf(PETSC_COMM_SELF," corner found (already set) %d\n",jj[k]); 900 } else if (PetscBTLookup(btvcand,jj[k])) { /* is it ok? */ 901 PetscInt k2; 902 PetscBool corner = PETSC_FALSE; 903 for (k2 = iit[jj[k]];k2 < iit[jj[k]+1];k2++) { 904 if (print) PetscPrintf(PETSC_COMM_SELF," INSPECTING %d: mark %d (ref mark %d), boundary %d\n",jjt[k2],marks[jjt[k2]],mark,!!PetscBTLookup(btb,jjt[k2])); 905 /* it's a corner if either is connected with an edge dof belonging to a different cc or 906 if the edge dof lie on the natural part of the boundary */ 907 if ((marks[jjt[k2]] && marks[jjt[k2]] != mark) || (!marks[jjt[k2]] && PetscBTLookup(btb,jjt[k2]))) { 908 corner = PETSC_TRUE; 909 break; 910 } 911 } 912 if (corner) { /* found the nodal dof corresponding to the endpoint of the edge */ 913 if (print) PetscPrintf(PETSC_COMM_SELF," corner found %d\n",jj[k]); 914 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 915 } else { 916 if (print) PetscPrintf(PETSC_COMM_SELF," no corners found\n"); 917 } 918 } 919 } 920 } 921 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 922 } 923 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 924 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 925 ierr = PetscBTDestroy(&btb);CHKERRQ(ierr); 926 927 /* Reset marked primal dofs */ 928 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 929 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 930 for (i=0;i<cum;i++) marks[idxs[i]] = 0; 931 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 932 933 /* Now use the initial lG */ 934 ierr = MatDestroy(&lG);CHKERRQ(ierr); 935 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 936 lG = lGinit; 937 ierr = MatTranspose(lG,MAT_INITIAL_MATRIX,&lGt);CHKERRQ(ierr); 938 939 /* Compute extended cols indices */ 940 ierr = PetscBTCreate(nv,&btvc);CHKERRQ(ierr); 941 ierr = PetscBTCreate(nee,&bter);CHKERRQ(ierr); 942 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 943 ierr = MatSeqAIJGetMaxRowNonzeros(lG,&i);CHKERRQ(ierr); 944 i *= maxsize; 945 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 946 ierr = PetscMalloc2(i,&extrow,i,&gidxs);CHKERRQ(ierr); 947 eerr = PETSC_FALSE; 948 for (i=0;i<nee;i++) { 949 PetscInt size,found = 0; 950 951 cum = 0; 952 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 953 if (!size && nedfieldlocal) continue; 954 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 955 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 956 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 957 for (j=0;j<size;j++) { 958 PetscInt k,ee = idxs[j]; 959 for (k=ii[ee];k<ii[ee+1];k++) { 960 PetscInt vv = jj[k]; 961 if (!PetscBTLookup(btv,vv)) extrow[cum++] = vv; 962 else if (!PetscBTLookupSet(btvc,vv)) found++; 963 } 964 } 965 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 966 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 967 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 968 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 969 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 970 /* it may happen that endpoints are not defined at this point 971 if it is the case, mark this edge for a second pass */ 972 if (cum != size -1 || found != 2) { 973 ierr = PetscBTSet(bter,i);CHKERRQ(ierr); 974 if (print) { 975 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge");CHKERRQ(ierr); 976 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 977 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol");CHKERRQ(ierr); 978 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 979 } 980 eerr = PETSC_TRUE; 981 } 982 } 983 /* if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL FIRST PASS"); */ 984 ierr = MPIU_Allreduce(&eerr,&done,1,MPIU_BOOL,MPI_LOR,comm);CHKERRQ(ierr); 985 if (done) { 986 PetscInt *newprimals; 987 988 ierr = PetscMalloc1(ne,&newprimals);CHKERRQ(ierr); 989 ierr = ISGetLocalSize(primals,&cum);CHKERRQ(ierr); 990 ierr = ISGetIndices(primals,&idxs);CHKERRQ(ierr); 991 ierr = PetscMemcpy(newprimals,idxs,cum*sizeof(PetscInt));CHKERRQ(ierr); 992 ierr = ISRestoreIndices(primals,&idxs);CHKERRQ(ierr); 993 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 994 if (print) PetscPrintf(PETSC_COMM_SELF,"DOING SECOND PASS (eerr %d)\n",eerr); 995 for (i=0;i<nee;i++) { 996 PetscBool has_candidates = PETSC_FALSE; 997 if (PetscBTLookup(bter,i)) { 998 PetscInt size,mark = i+1; 999 1000 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1001 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1002 /* for (j=0;j<size;j++) newprimals[cum++] = idxs[j]; */ 1003 for (j=0;j<size;j++) { 1004 PetscInt k,ee = idxs[j]; 1005 if (print) PetscPrintf(PETSC_COMM_SELF,"Inspecting edge dof %d [%d %d)\n",ee,ii[ee],ii[ee+1]); 1006 for (k=ii[ee];k<ii[ee+1];k++) { 1007 /* set all candidates located on the edge as corners */ 1008 if (PetscBTLookup(btvcand,jj[k])) { 1009 PetscInt k2,vv = jj[k]; 1010 has_candidates = PETSC_TRUE; 1011 if (print) PetscPrintf(PETSC_COMM_SELF," Candidate set to vertex %d\n",vv); 1012 ierr = PetscBTSet(btv,vv);CHKERRQ(ierr); 1013 /* set all edge dofs connected to candidate as primals */ 1014 for (k2=iit[vv];k2<iit[vv+1];k2++) { 1015 if (marks[jjt[k2]] == mark) { 1016 PetscInt k3,ee2 = jjt[k2]; 1017 if (print) PetscPrintf(PETSC_COMM_SELF," Connected edge dof set to primal %d\n",ee2); 1018 newprimals[cum++] = ee2; 1019 /* finally set the new corners */ 1020 for (k3=ii[ee2];k3<ii[ee2+1];k3++) { 1021 if (print) PetscPrintf(PETSC_COMM_SELF," Connected nodal dof set to vertex %d\n",jj[k3]); 1022 ierr = PetscBTSet(btv,jj[k3]);CHKERRQ(ierr); 1023 } 1024 } 1025 } 1026 } else { 1027 if (print) PetscPrintf(PETSC_COMM_SELF," Not a candidate vertex %d\n",jj[k]); 1028 } 1029 } 1030 } 1031 if (!has_candidates) { /* circular edge */ 1032 PetscInt k, ee = idxs[0],*tmarks; 1033 1034 ierr = PetscCalloc1(ne,&tmarks);CHKERRQ(ierr); 1035 if (print) PetscPrintf(PETSC_COMM_SELF," Circular edge %d\n",i); 1036 for (k=ii[ee];k<ii[ee+1];k++) { 1037 PetscInt k2; 1038 if (print) PetscPrintf(PETSC_COMM_SELF," Set to corner %d\n",jj[k]); 1039 ierr = PetscBTSet(btv,jj[k]);CHKERRQ(ierr); 1040 for (k2=iit[jj[k]];k2<iit[jj[k]+1];k2++) tmarks[jjt[k2]]++; 1041 } 1042 for (j=0;j<size;j++) { 1043 if (tmarks[idxs[j]] > 1) { 1044 if (print) PetscPrintf(PETSC_COMM_SELF," Edge dof set to primal %d\n",idxs[j]); 1045 newprimals[cum++] = idxs[j]; 1046 } 1047 } 1048 ierr = PetscFree(tmarks);CHKERRQ(ierr); 1049 } 1050 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1051 } 1052 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1053 } 1054 ierr = PetscFree(extcols);CHKERRQ(ierr); 1055 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&iit,&jjt,&done);CHKERRQ(ierr); 1056 ierr = PetscSortRemoveDupsInt(&cum,newprimals);CHKERRQ(ierr); 1057 if (fl2g) { 1058 ierr = ISLocalToGlobalMappingApply(fl2g,cum,newprimals,newprimals);CHKERRQ(ierr); 1059 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1060 for (i=0;i<nee;i++) { 1061 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1062 } 1063 ierr = PetscFree(eedges);CHKERRQ(ierr); 1064 } 1065 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1066 ierr = ISCreateGeneral(comm,cum,newprimals,PETSC_COPY_VALUES,&primals);CHKERRQ(ierr); 1067 ierr = PetscFree(newprimals);CHKERRQ(ierr); 1068 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primals);CHKERRQ(ierr); 1069 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1070 ierr = PCBDDCAnalyzeInterface(pc);CHKERRQ(ierr); 1071 pcbddc->mat_graph->twodim = PETSC_FALSE; 1072 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1073 if (fl2g) { 1074 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,allprimals,&primals);CHKERRQ(ierr); 1075 ierr = PetscMalloc1(nee,&eedges);CHKERRQ(ierr); 1076 for (i=0;i<nee;i++) { 1077 ierr = ISGlobalToLocalMappingApplyIS(fl2g,IS_GTOLM_DROP,alleedges[i],&eedges[i]);CHKERRQ(ierr); 1078 } 1079 } else { 1080 eedges = alleedges; 1081 primals = allprimals; 1082 } 1083 ierr = PetscCalloc1(nee,&extcols);CHKERRQ(ierr); 1084 1085 /* Mark again */ 1086 ierr = PetscMemzero(marks,ne*sizeof(PetscInt));CHKERRQ(ierr); 1087 for (i=0;i<nee;i++) { 1088 PetscInt size,mark = i+1; 1089 1090 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1091 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1092 for (j=0;j<size;j++) marks[idxs[j]] = mark; 1093 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1094 } 1095 if (print) { 1096 ierr = PetscObjectSetName((PetscObject)primals,"obtained_primal_dofs_secondpass");CHKERRQ(ierr); 1097 ierr = ISView(primals,NULL);CHKERRQ(ierr); 1098 } 1099 1100 /* Recompute extended cols */ 1101 eerr = PETSC_FALSE; 1102 for (i=0;i<nee;i++) { 1103 PetscInt size; 1104 1105 cum = 0; 1106 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1107 if (!size && nedfieldlocal) continue; 1108 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1109 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1110 for (j=0;j<size;j++) { 1111 PetscInt k,ee = idxs[j]; 1112 for (k=ii[ee];k<ii[ee+1];k++) if (!PetscBTLookup(btv,jj[k])) extrow[cum++] = jj[k]; 1113 } 1114 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1115 ierr = PetscSortRemoveDupsInt(&cum,extrow);CHKERRQ(ierr); 1116 ierr = ISLocalToGlobalMappingApply(vl2g,cum,extrow,gidxs);CHKERRQ(ierr); 1117 ierr = PetscSortIntWithArray(cum,gidxs,extrow);CHKERRQ(ierr); 1118 ierr = ISCreateGeneral(PETSC_COMM_SELF,cum,extrow,PETSC_COPY_VALUES,&extcols[i]);CHKERRQ(ierr); 1119 if (cum != size -1) { 1120 if (print) { 1121 ierr = PetscObjectSetName((PetscObject)eedges[i],"error_edge_secondpass");CHKERRQ(ierr); 1122 ierr = ISView(eedges[i],NULL);CHKERRQ(ierr); 1123 ierr = PetscObjectSetName((PetscObject)extcols[i],"error_extcol_secondpass");CHKERRQ(ierr); 1124 ierr = ISView(extcols[i],NULL);CHKERRQ(ierr); 1125 } 1126 eerr = PETSC_TRUE; 1127 } 1128 } 1129 } 1130 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1131 ierr = PetscFree2(extrow,gidxs);CHKERRQ(ierr); 1132 ierr = PetscBTDestroy(&bter);CHKERRQ(ierr); 1133 if (print) { ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,5,PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); } 1134 /* an error should not occur at this point */ 1135 if (eerr) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected SIZE OF EDGE > EXTCOL SECOND PASS"); 1136 1137 /* Check the number of endpoints */ 1138 ierr = MatGetRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1139 ierr = PetscMalloc1(2*nee,&corners);CHKERRQ(ierr); 1140 ierr = PetscMalloc1(nee,&cedges);CHKERRQ(ierr); 1141 for (i=0;i<nee;i++) { 1142 PetscInt size, found = 0, gc[2]; 1143 1144 /* init with defaults */ 1145 cedges[i] = corners[i*2] = corners[i*2+1] = -1; 1146 ierr = ISGetLocalSize(eedges[i],&size);CHKERRQ(ierr); 1147 if (!size && nedfieldlocal) continue; 1148 if (!size) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Unexpected zero sized edge %d",i); 1149 ierr = ISGetIndices(eedges[i],&idxs);CHKERRQ(ierr); 1150 ierr = PetscBTMemzero(nv,btvc);CHKERRQ(ierr); 1151 for (j=0;j<size;j++) { 1152 PetscInt k,ee = idxs[j]; 1153 for (k=ii[ee];k<ii[ee+1];k++) { 1154 PetscInt vv = jj[k]; 1155 if (PetscBTLookup(btv,vv) && !PetscBTLookupSet(btvc,vv)) { 1156 if (found == 2) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found more then two corners for edge %d\n",i); 1157 corners[i*2+found++] = vv; 1158 } 1159 } 1160 } 1161 if (found != 2) { 1162 PetscInt e; 1163 if (fl2g) { 1164 ierr = ISLocalToGlobalMappingApply(fl2g,1,idxs,&e);CHKERRQ(ierr); 1165 } else { 1166 e = idxs[0]; 1167 } 1168 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Found %d corners for edge %d (astart %d, estart %d)\n",found,i,e,idxs[0]); 1169 } 1170 1171 /* get primal dof index on this coarse edge */ 1172 ierr = ISLocalToGlobalMappingApply(vl2g,2,corners+2*i,gc);CHKERRQ(ierr); 1173 if (gc[0] > gc[1]) { 1174 PetscInt swap = corners[2*i]; 1175 corners[2*i] = corners[2*i+1]; 1176 corners[2*i+1] = swap; 1177 } 1178 cedges[i] = idxs[size-1]; 1179 ierr = ISRestoreIndices(eedges[i],&idxs);CHKERRQ(ierr); 1180 if (print) PetscPrintf(PETSC_COMM_SELF,"EDGE %d: ce %d, corners (%d,%d)\n",i,cedges[i],corners[2*i],corners[2*i+1]); 1181 } 1182 ierr = MatRestoreRowIJ(lG,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1183 ierr = PetscBTDestroy(&btvc);CHKERRQ(ierr); 1184 1185 #if defined(PETSC_USE_DEBUG) 1186 /* Inspects columns of lG (rows of lGt) and make sure the change of basis will 1187 not interfere with neighbouring coarse edges */ 1188 ierr = PetscMalloc1(nee+1,&emarks);CHKERRQ(ierr); 1189 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1190 for (i=0;i<nv;i++) { 1191 PetscInt emax = 0,eemax = 0; 1192 1193 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1194 ierr = PetscMemzero(emarks,(nee+1)*sizeof(PetscInt));CHKERRQ(ierr); 1195 for (j=ii[i];j<ii[i+1];j++) emarks[marks[jj[j]]]++; 1196 for (j=1;j<nee+1;j++) { 1197 if (emax < emarks[j]) { 1198 emax = emarks[j]; 1199 eemax = j; 1200 } 1201 } 1202 /* not relevant for edges */ 1203 if (!eemax) continue; 1204 1205 for (j=ii[i];j<ii[i+1];j++) { 1206 if (marks[jj[j]] && marks[jj[j]] != eemax) { 1207 SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_SUP,"Found 2 coarse edges (id %d and %d) connected through the %d nodal dof at edge dof %d\n",marks[jj[j]]-1,eemax,i,jj[j]); 1208 } 1209 } 1210 } 1211 ierr = PetscFree(emarks);CHKERRQ(ierr); 1212 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1213 #endif 1214 1215 /* Compute extended rows indices for edge blocks of the change of basis */ 1216 ierr = MatGetRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1217 ierr = MatSeqAIJGetMaxRowNonzeros(lGt,&extmem);CHKERRQ(ierr); 1218 extmem *= maxsize; 1219 ierr = PetscMalloc1(extmem*nee,&extrow);CHKERRQ(ierr); 1220 ierr = PetscMalloc1(nee,&extrows);CHKERRQ(ierr); 1221 ierr = PetscCalloc1(nee,&extrowcum);CHKERRQ(ierr); 1222 for (i=0;i<nv;i++) { 1223 PetscInt mark = 0,size,start; 1224 1225 if (ii[i+1]==ii[i] || PetscBTLookup(btv,i)) continue; 1226 for (j=ii[i];j<ii[i+1];j++) 1227 if (marks[jj[j]] && !mark) 1228 mark = marks[jj[j]]; 1229 1230 /* not relevant */ 1231 if (!mark) continue; 1232 1233 /* import extended row */ 1234 mark--; 1235 start = mark*extmem+extrowcum[mark]; 1236 size = ii[i+1]-ii[i]; 1237 if (extrowcum[mark] + size > extmem) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Not enough memory allocated %d > %d",extrowcum[mark] + size,extmem); 1238 ierr = PetscMemcpy(extrow+start,jj+ii[i],size*sizeof(PetscInt));CHKERRQ(ierr); 1239 extrowcum[mark] += size; 1240 } 1241 ierr = MatRestoreRowIJ(lGt,0,PETSC_FALSE,PETSC_FALSE,&i,&ii,&jj,&done);CHKERRQ(ierr); 1242 ierr = MatDestroy(&lGt);CHKERRQ(ierr); 1243 ierr = PetscFree(marks);CHKERRQ(ierr); 1244 1245 /* Compress extrows */ 1246 cum = 0; 1247 for (i=0;i<nee;i++) { 1248 PetscInt size = extrowcum[i],*start = extrow + i*extmem; 1249 ierr = PetscSortRemoveDupsInt(&size,start);CHKERRQ(ierr); 1250 ierr = ISCreateGeneral(PETSC_COMM_SELF,size,start,PETSC_USE_POINTER,&extrows[i]);CHKERRQ(ierr); 1251 cum = PetscMax(cum,size); 1252 } 1253 ierr = PetscFree(extrowcum);CHKERRQ(ierr); 1254 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 1255 ierr = PetscBTDestroy(&btvcand);CHKERRQ(ierr); 1256 1257 /* Workspace for lapack inner calls and VecSetValues */ 1258 ierr = PetscMalloc2((5+cum+maxsize)*maxsize,&work,maxsize,&rwork);CHKERRQ(ierr); 1259 1260 /* Create change of basis matrix (preallocation can be improved) */ 1261 ierr = MatCreate(comm,&T);CHKERRQ(ierr); 1262 ierr = MatSetSizes(T,pc->pmat->rmap->n,pc->pmat->rmap->n, 1263 pc->pmat->rmap->N,pc->pmat->rmap->N);CHKERRQ(ierr); 1264 ierr = MatSetType(T,MATAIJ);CHKERRQ(ierr); 1265 ierr = MatSeqAIJSetPreallocation(T,10,NULL);CHKERRQ(ierr); 1266 ierr = MatMPIAIJSetPreallocation(T,10,NULL,10,NULL);CHKERRQ(ierr); 1267 ierr = MatSetLocalToGlobalMapping(T,al2g,al2g);CHKERRQ(ierr); 1268 ierr = MatSetOption(T,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 1269 ierr = MatSetOption(T,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 1270 ierr = ISLocalToGlobalMappingDestroy(&al2g);CHKERRQ(ierr); 1271 1272 /* Defaults to identity */ 1273 ierr = MatCreateVecs(pc->pmat,&tvec,NULL);CHKERRQ(ierr); 1274 ierr = VecSet(tvec,1.0);CHKERRQ(ierr); 1275 ierr = MatDiagonalSet(T,tvec,INSERT_VALUES);CHKERRQ(ierr); 1276 ierr = VecDestroy(&tvec);CHKERRQ(ierr); 1277 1278 /* Create discrete gradient for the coarser level if needed */ 1279 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 1280 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 1281 if (pcbddc->current_level < pcbddc->max_levels) { 1282 ISLocalToGlobalMapping cel2g,cvl2g; 1283 IS wis,gwis; 1284 PetscInt cnv,cne; 1285 1286 ierr = ISCreateGeneral(comm,nee,cedges,PETSC_COPY_VALUES,&wis);CHKERRQ(ierr); 1287 if (fl2g) { 1288 ierr = ISLocalToGlobalMappingApplyIS(fl2g,wis,&pcbddc->nedclocal);CHKERRQ(ierr); 1289 } else { 1290 ierr = PetscObjectReference((PetscObject)wis);CHKERRQ(ierr); 1291 pcbddc->nedclocal = wis; 1292 } 1293 ierr = ISLocalToGlobalMappingApplyIS(el2g,wis,&gwis);CHKERRQ(ierr); 1294 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1295 ierr = ISRenumber(gwis,NULL,&cne,&wis);CHKERRQ(ierr); 1296 ierr = ISLocalToGlobalMappingCreateIS(wis,&cel2g);CHKERRQ(ierr); 1297 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1298 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1299 1300 ierr = ISCreateGeneral(comm,2*nee,corners,PETSC_USE_POINTER,&wis);CHKERRQ(ierr); 1301 ierr = ISLocalToGlobalMappingApplyIS(vl2g,wis,&gwis);CHKERRQ(ierr); 1302 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1303 ierr = ISRenumber(gwis,NULL,&cnv,&wis);CHKERRQ(ierr); 1304 ierr = ISLocalToGlobalMappingCreateIS(wis,&cvl2g);CHKERRQ(ierr); 1305 ierr = ISDestroy(&wis);CHKERRQ(ierr); 1306 ierr = ISDestroy(&gwis);CHKERRQ(ierr); 1307 1308 ierr = MatCreate(comm,&pcbddc->nedcG);CHKERRQ(ierr); 1309 ierr = MatSetSizes(pcbddc->nedcG,PETSC_DECIDE,PETSC_DECIDE,cne,cnv);CHKERRQ(ierr); 1310 ierr = MatSetType(pcbddc->nedcG,MATAIJ);CHKERRQ(ierr); 1311 ierr = MatSeqAIJSetPreallocation(pcbddc->nedcG,2,NULL);CHKERRQ(ierr); 1312 ierr = MatMPIAIJSetPreallocation(pcbddc->nedcG,2,NULL,2,NULL);CHKERRQ(ierr); 1313 ierr = MatSetLocalToGlobalMapping(pcbddc->nedcG,cel2g,cvl2g);CHKERRQ(ierr); 1314 ierr = ISLocalToGlobalMappingDestroy(&cel2g);CHKERRQ(ierr); 1315 ierr = ISLocalToGlobalMappingDestroy(&cvl2g);CHKERRQ(ierr); 1316 } 1317 ierr = ISLocalToGlobalMappingDestroy(&vl2g);CHKERRQ(ierr); 1318 1319 #if defined(PRINT_GDET) 1320 inc = 0; 1321 lev = pcbddc->current_level; 1322 #endif 1323 1324 /* Insert values in the change of basis matrix */ 1325 for (i=0;i<nee;i++) { 1326 Mat Gins = NULL, GKins = NULL; 1327 IS cornersis = NULL; 1328 PetscScalar cvals[2]; 1329 1330 if (pcbddc->nedcG) { 1331 ierr = ISCreateGeneral(PETSC_COMM_SELF,2,corners+2*i,PETSC_USE_POINTER,&cornersis);CHKERRQ(ierr); 1332 } 1333 ierr = PCBDDCComputeNedelecChangeEdge(lG,eedges[i],extrows[i],extcols[i],cornersis,&Gins,&GKins,cvals,work,rwork);CHKERRQ(ierr); 1334 if (Gins && GKins) { 1335 PetscScalar *data; 1336 const PetscInt *rows,*cols; 1337 PetscInt nrh,nch,nrc,ncc; 1338 1339 ierr = ISGetIndices(eedges[i],&cols);CHKERRQ(ierr); 1340 /* H1 */ 1341 ierr = ISGetIndices(extrows[i],&rows);CHKERRQ(ierr); 1342 ierr = MatGetSize(Gins,&nrh,&nch);CHKERRQ(ierr); 1343 ierr = MatDenseGetArray(Gins,&data);CHKERRQ(ierr); 1344 ierr = MatSetValuesLocal(T,nrh,rows,nch,cols,data,INSERT_VALUES);CHKERRQ(ierr); 1345 ierr = MatDenseRestoreArray(Gins,&data);CHKERRQ(ierr); 1346 ierr = ISRestoreIndices(extrows[i],&rows);CHKERRQ(ierr); 1347 /* complement */ 1348 ierr = MatGetSize(GKins,&nrc,&ncc);CHKERRQ(ierr); 1349 if (!ncc) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Constant function has not been generated for coarse edge %d",i); 1350 if (ncc + nch != nrc) SETERRQ4(PETSC_COMM_SELF,PETSC_ERR_PLIB,"The sum of the number of columns of GKins %d and Gins %d does not match %d for coarse edge %d",ncc,nch,nrc,i); 1351 if (ncc != 1 && pcbddc->nedcG) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot generate the coarse discrete gradient for coarse edge %d with ncc %d",i,ncc); 1352 ierr = MatDenseGetArray(GKins,&data);CHKERRQ(ierr); 1353 ierr = MatSetValuesLocal(T,nrc,cols,ncc,cols+nch,data,INSERT_VALUES);CHKERRQ(ierr); 1354 ierr = MatDenseRestoreArray(GKins,&data);CHKERRQ(ierr); 1355 1356 /* coarse discrete gradient */ 1357 if (pcbddc->nedcG) { 1358 PetscInt cols[2]; 1359 1360 cols[0] = 2*i; 1361 cols[1] = 2*i+1; 1362 ierr = MatSetValuesLocal(pcbddc->nedcG,1,&i,2,cols,cvals,INSERT_VALUES);CHKERRQ(ierr); 1363 } 1364 ierr = ISRestoreIndices(eedges[i],&cols);CHKERRQ(ierr); 1365 } 1366 ierr = ISDestroy(&extrows[i]);CHKERRQ(ierr); 1367 ierr = ISDestroy(&extcols[i]);CHKERRQ(ierr); 1368 ierr = ISDestroy(&cornersis);CHKERRQ(ierr); 1369 ierr = MatDestroy(&Gins);CHKERRQ(ierr); 1370 ierr = MatDestroy(&GKins);CHKERRQ(ierr); 1371 } 1372 ierr = ISLocalToGlobalMappingDestroy(&el2g);CHKERRQ(ierr); 1373 1374 /* Start assembling */ 1375 ierr = MatAssemblyBegin(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1376 if (pcbddc->nedcG) { 1377 ierr = MatAssemblyBegin(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1378 } 1379 1380 /* Free */ 1381 if (fl2g) { 1382 ierr = ISDestroy(&primals);CHKERRQ(ierr); 1383 for (i=0;i<nee;i++) { 1384 ierr = ISDestroy(&eedges[i]);CHKERRQ(ierr); 1385 } 1386 ierr = PetscFree(eedges);CHKERRQ(ierr); 1387 } 1388 1389 /* hack mat_graph with primal dofs on the coarse edges */ 1390 { 1391 PCBDDCGraph graph = pcbddc->mat_graph; 1392 PetscInt *oqueue = graph->queue; 1393 PetscInt *ocptr = graph->cptr; 1394 PetscInt ncc,*idxs; 1395 1396 /* find first primal edge */ 1397 if (pcbddc->nedclocal) { 1398 ierr = ISGetIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1399 } else { 1400 if (fl2g) { 1401 ierr = ISLocalToGlobalMappingApply(fl2g,nee,cedges,cedges);CHKERRQ(ierr); 1402 } 1403 idxs = cedges; 1404 } 1405 cum = 0; 1406 while (cum < nee && cedges[cum] < 0) cum++; 1407 1408 /* adapt connected components */ 1409 ierr = PetscMalloc2(graph->nvtxs+1,&graph->cptr,ocptr[graph->ncc],&graph->queue);CHKERRQ(ierr); 1410 graph->cptr[0] = 0; 1411 for (i=0,ncc=0;i<graph->ncc;i++) { 1412 PetscInt lc = ocptr[i+1]-ocptr[i]; 1413 if (cum != nee && oqueue[ocptr[i+1]-1] == cedges[cum]) { /* this cc has a primal dof */ 1414 graph->cptr[ncc+1] = graph->cptr[ncc]+1; 1415 graph->queue[graph->cptr[ncc]] = cedges[cum]; 1416 ncc++; 1417 lc--; 1418 cum++; 1419 while (cum < nee && cedges[cum] < 0) cum++; 1420 } 1421 graph->cptr[ncc+1] = graph->cptr[ncc] + lc; 1422 for (j=0;j<lc;j++) graph->queue[graph->cptr[ncc]+j] = oqueue[ocptr[i]+j]; 1423 ncc++; 1424 } 1425 graph->ncc = ncc; 1426 if (pcbddc->nedclocal) { 1427 ierr = ISRestoreIndices(pcbddc->nedclocal,(const PetscInt**)&idxs);CHKERRQ(ierr); 1428 } 1429 ierr = PetscFree2(ocptr,oqueue);CHKERRQ(ierr); 1430 } 1431 ierr = ISLocalToGlobalMappingDestroy(&fl2g);CHKERRQ(ierr); 1432 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,&nee,&alleedges,&allprimals);CHKERRQ(ierr); 1433 ierr = PCBDDCGraphResetCSR(pcbddc->mat_graph);CHKERRQ(ierr); 1434 ierr = MatDestroy(&conn);CHKERRQ(ierr); 1435 1436 ierr = ISDestroy(&nedfieldlocal);CHKERRQ(ierr); 1437 ierr = PetscFree(extrow);CHKERRQ(ierr); 1438 ierr = PetscFree2(work,rwork);CHKERRQ(ierr); 1439 ierr = PetscFree(corners);CHKERRQ(ierr); 1440 ierr = PetscFree(cedges);CHKERRQ(ierr); 1441 ierr = PetscFree(extrows);CHKERRQ(ierr); 1442 ierr = PetscFree(extcols);CHKERRQ(ierr); 1443 ierr = MatDestroy(&lG);CHKERRQ(ierr); 1444 1445 /* Complete assembling */ 1446 ierr = MatAssemblyEnd(T,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1447 if (pcbddc->nedcG) { 1448 ierr = MatAssemblyEnd(pcbddc->nedcG,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 1449 #if 0 1450 ierr = PetscObjectSetName((PetscObject)pcbddc->nedcG,"coarse_G");CHKERRQ(ierr); 1451 ierr = MatView(pcbddc->nedcG,NULL);CHKERRQ(ierr); 1452 #endif 1453 } 1454 1455 /* set change of basis */ 1456 ierr = PCBDDCSetChangeOfBasisMat(pc,T,singular);CHKERRQ(ierr); 1457 ierr = MatDestroy(&T);CHKERRQ(ierr); 1458 1459 PetscFunctionReturn(0); 1460 } 1461 1462 /* the near-null space of BDDC carries information on quadrature weights, 1463 and these can be collinear -> so cheat with MatNullSpaceCreate 1464 and create a suitable set of basis vectors first */ 1465 PetscErrorCode PCBDDCNullSpaceCreate(MPI_Comm comm, PetscBool has_const, PetscInt nvecs, Vec quad_vecs[], MatNullSpace *nnsp) 1466 { 1467 PetscErrorCode ierr; 1468 PetscInt i; 1469 1470 PetscFunctionBegin; 1471 for (i=0;i<nvecs;i++) { 1472 PetscInt first,last; 1473 1474 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1475 if (last-first < 2*nvecs && has_const) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not implemented"); 1476 if (i>=first && i < last) { 1477 PetscScalar *data; 1478 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1479 if (!has_const) { 1480 data[i-first] = 1.; 1481 } else { 1482 data[2*i-first] = 1./PetscSqrtReal(2.); 1483 data[2*i-first+1] = -1./PetscSqrtReal(2.); 1484 } 1485 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1486 } 1487 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1488 } 1489 ierr = MatNullSpaceCreate(comm,has_const,nvecs,quad_vecs,nnsp);CHKERRQ(ierr); 1490 for (i=0;i<nvecs;i++) { /* reset vectors */ 1491 PetscInt first,last; 1492 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1493 ierr = VecGetOwnershipRange(quad_vecs[i],&first,&last);CHKERRQ(ierr); 1494 if (i>=first && i < last) { 1495 PetscScalar *data; 1496 ierr = VecGetArray(quad_vecs[i],&data);CHKERRQ(ierr); 1497 if (!has_const) { 1498 data[i-first] = 0.; 1499 } else { 1500 data[2*i-first] = 0.; 1501 data[2*i-first+1] = 0.; 1502 } 1503 ierr = VecRestoreArray(quad_vecs[i],&data);CHKERRQ(ierr); 1504 } 1505 ierr = PetscObjectStateIncrease((PetscObject)quad_vecs[i]);CHKERRQ(ierr); 1506 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1507 } 1508 PetscFunctionReturn(0); 1509 } 1510 1511 PetscErrorCode PCBDDCComputeNoNetFlux(Mat A, Mat divudotp, PetscBool transpose, IS vl2l, PCBDDCGraph graph, MatNullSpace *nnsp) 1512 { 1513 Mat loc_divudotp; 1514 Vec p,v,vins,quad_vec,*quad_vecs; 1515 ISLocalToGlobalMapping map; 1516 IS *faces,*edges; 1517 PetscScalar *vals; 1518 const PetscScalar *array; 1519 PetscInt i,maxneighs,lmaxneighs,maxsize,nf,ne; 1520 PetscMPIInt rank; 1521 PetscErrorCode ierr; 1522 1523 PetscFunctionBegin; 1524 ierr = PCBDDCGraphGetCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1525 if (graph->twodim) { 1526 lmaxneighs = 2; 1527 } else { 1528 lmaxneighs = 1; 1529 for (i=0;i<ne;i++) { 1530 const PetscInt *idxs; 1531 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1532 lmaxneighs = PetscMax(lmaxneighs,graph->count[idxs[0]]); 1533 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1534 } 1535 lmaxneighs++; /* graph count does not include self */ 1536 } 1537 ierr = MPIU_Allreduce(&lmaxneighs,&maxneighs,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)A));CHKERRQ(ierr); 1538 maxsize = 0; 1539 for (i=0;i<ne;i++) { 1540 PetscInt nn; 1541 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1542 maxsize = PetscMax(maxsize,nn); 1543 } 1544 for (i=0;i<nf;i++) { 1545 PetscInt nn; 1546 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1547 maxsize = PetscMax(maxsize,nn); 1548 } 1549 ierr = PetscMalloc1(maxsize,&vals);CHKERRQ(ierr); 1550 /* create vectors to hold quadrature weights */ 1551 ierr = MatCreateVecs(A,&quad_vec,NULL);CHKERRQ(ierr); 1552 if (!transpose) { 1553 ierr = MatGetLocalToGlobalMapping(A,&map,NULL);CHKERRQ(ierr); 1554 } else { 1555 ierr = MatGetLocalToGlobalMapping(A,NULL,&map);CHKERRQ(ierr); 1556 } 1557 ierr = VecDuplicateVecs(quad_vec,maxneighs,&quad_vecs);CHKERRQ(ierr); 1558 ierr = VecDestroy(&quad_vec);CHKERRQ(ierr); 1559 ierr = PCBDDCNullSpaceCreate(PetscObjectComm((PetscObject)A),PETSC_FALSE,maxneighs,quad_vecs,nnsp);CHKERRQ(ierr); 1560 for (i=0;i<maxneighs;i++) { 1561 ierr = VecLockPop(quad_vecs[i]);CHKERRQ(ierr); 1562 ierr = VecSetLocalToGlobalMapping(quad_vecs[i],map);CHKERRQ(ierr); 1563 } 1564 1565 /* compute local quad vec */ 1566 ierr = MatISGetLocalMat(divudotp,&loc_divudotp);CHKERRQ(ierr); 1567 if (!transpose) { 1568 ierr = MatCreateVecs(loc_divudotp,&v,&p);CHKERRQ(ierr); 1569 } else { 1570 ierr = MatCreateVecs(loc_divudotp,&p,&v);CHKERRQ(ierr); 1571 } 1572 ierr = VecSet(p,1.);CHKERRQ(ierr); 1573 if (!transpose) { 1574 ierr = MatMultTranspose(loc_divudotp,p,v);CHKERRQ(ierr); 1575 } else { 1576 ierr = MatMult(loc_divudotp,p,v);CHKERRQ(ierr); 1577 } 1578 if (vl2l) { 1579 Mat lA; 1580 VecScatter sc; 1581 1582 ierr = MatISGetLocalMat(A,&lA);CHKERRQ(ierr); 1583 ierr = MatCreateVecs(lA,&vins,NULL);CHKERRQ(ierr); 1584 ierr = VecScatterCreate(v,vl2l,vins,NULL,&sc);CHKERRQ(ierr); 1585 ierr = VecScatterBegin(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1586 ierr = VecScatterEnd(sc,v,vins,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1587 ierr = VecScatterDestroy(&sc);CHKERRQ(ierr); 1588 } else { 1589 vins = v; 1590 } 1591 ierr = VecGetArrayRead(vins,&array);CHKERRQ(ierr); 1592 ierr = VecDestroy(&p);CHKERRQ(ierr); 1593 1594 /* insert in global quadrature vecs */ 1595 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)A),&rank);CHKERRQ(ierr); 1596 for (i=0;i<nf;i++) { 1597 const PetscInt *idxs; 1598 PetscInt idx,nn,j; 1599 1600 ierr = ISGetIndices(faces[i],&idxs);CHKERRQ(ierr); 1601 ierr = ISGetLocalSize(faces[i],&nn);CHKERRQ(ierr); 1602 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1603 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1604 idx = -(idx+1); 1605 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1606 ierr = ISRestoreIndices(faces[i],&idxs);CHKERRQ(ierr); 1607 } 1608 for (i=0;i<ne;i++) { 1609 const PetscInt *idxs; 1610 PetscInt idx,nn,j; 1611 1612 ierr = ISGetIndices(edges[i],&idxs);CHKERRQ(ierr); 1613 ierr = ISGetLocalSize(edges[i],&nn);CHKERRQ(ierr); 1614 for (j=0;j<nn;j++) vals[j] = array[idxs[j]]; 1615 ierr = PetscFindInt(rank,graph->count[idxs[0]],graph->neighbours_set[idxs[0]],&idx);CHKERRQ(ierr); 1616 idx = -(idx+1); 1617 ierr = VecSetValuesLocal(quad_vecs[idx],nn,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 1618 ierr = ISRestoreIndices(edges[i],&idxs);CHKERRQ(ierr); 1619 } 1620 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nf,&faces,&ne,&edges,NULL);CHKERRQ(ierr); 1621 ierr = VecRestoreArrayRead(vins,&array);CHKERRQ(ierr); 1622 if (vl2l) { 1623 ierr = VecDestroy(&vins);CHKERRQ(ierr); 1624 } 1625 ierr = VecDestroy(&v);CHKERRQ(ierr); 1626 ierr = PetscFree(vals);CHKERRQ(ierr); 1627 1628 /* assemble near null space */ 1629 for (i=0;i<maxneighs;i++) { 1630 ierr = VecAssemblyBegin(quad_vecs[i]);CHKERRQ(ierr); 1631 } 1632 for (i=0;i<maxneighs;i++) { 1633 ierr = VecAssemblyEnd(quad_vecs[i]);CHKERRQ(ierr); 1634 ierr = VecLockPush(quad_vecs[i]);CHKERRQ(ierr); 1635 } 1636 ierr = VecDestroyVecs(maxneighs,&quad_vecs);CHKERRQ(ierr); 1637 PetscFunctionReturn(0); 1638 } 1639 1640 PetscErrorCode PCBDDCAddPrimalVerticesLocalIS(PC pc, IS primalv) 1641 { 1642 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1643 PetscErrorCode ierr; 1644 1645 PetscFunctionBegin; 1646 if (primalv) { 1647 if (pcbddc->user_primal_vertices_local) { 1648 IS list[2], newp; 1649 1650 list[0] = primalv; 1651 list[1] = pcbddc->user_primal_vertices_local; 1652 ierr = ISConcatenate(PetscObjectComm((PetscObject)pc),2,list,&newp);CHKERRQ(ierr); 1653 ierr = ISSortRemoveDups(newp);CHKERRQ(ierr); 1654 ierr = ISDestroy(&list[1]);CHKERRQ(ierr); 1655 pcbddc->user_primal_vertices_local = newp; 1656 } else { 1657 ierr = PCBDDCSetPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1658 } 1659 } 1660 PetscFunctionReturn(0); 1661 } 1662 1663 PetscErrorCode PCBDDCComputeLocalTopologyInfo(PC pc) 1664 { 1665 PetscErrorCode ierr; 1666 Vec local,global; 1667 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1668 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 1669 PetscBool monolithic = PETSC_FALSE; 1670 1671 PetscFunctionBegin; 1672 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC topology options","PC");CHKERRQ(ierr); 1673 ierr = PetscOptionsBool("-pc_bddc_monolithic","Discard any information on dofs splitting",NULL,monolithic,&monolithic,NULL);CHKERRQ(ierr); 1674 ierr = PetscOptionsEnd();CHKERRQ(ierr); 1675 /* need to convert from global to local topology information and remove references to information in global ordering */ 1676 ierr = MatCreateVecs(pc->pmat,&global,NULL);CHKERRQ(ierr); 1677 ierr = MatCreateVecs(matis->A,&local,NULL);CHKERRQ(ierr); 1678 if (monolithic) goto boundary; 1679 1680 if (pcbddc->user_provided_isfordofs) { 1681 if (pcbddc->n_ISForDofs) { 1682 PetscInt i; 1683 ierr = PetscMalloc1(pcbddc->n_ISForDofs,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1684 for (i=0;i<pcbddc->n_ISForDofs;i++) { 1685 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->ISForDofs[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1686 ierr = ISDestroy(&pcbddc->ISForDofs[i]);CHKERRQ(ierr); 1687 } 1688 pcbddc->n_ISForDofsLocal = pcbddc->n_ISForDofs; 1689 pcbddc->n_ISForDofs = 0; 1690 ierr = PetscFree(pcbddc->ISForDofs);CHKERRQ(ierr); 1691 } 1692 } else { 1693 if (!pcbddc->n_ISForDofsLocal) { /* field split not present */ 1694 DM dm; 1695 1696 ierr = PCGetDM(pc, &dm);CHKERRQ(ierr); 1697 if (!dm) { 1698 ierr = MatGetDM(pc->pmat, &dm);CHKERRQ(ierr); 1699 } 1700 if (dm) { 1701 IS *fields; 1702 PetscInt nf,i; 1703 ierr = DMCreateFieldDecomposition(dm,&nf,NULL,&fields,NULL);CHKERRQ(ierr); 1704 ierr = PetscMalloc1(nf,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1705 for (i=0;i<nf;i++) { 1706 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,fields[i],&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1707 ierr = ISDestroy(&fields[i]);CHKERRQ(ierr); 1708 } 1709 ierr = PetscFree(fields);CHKERRQ(ierr); 1710 pcbddc->n_ISForDofsLocal = nf; 1711 } else { /* See if MATIS has fields attached by the conversion from MatNest */ 1712 PetscContainer c; 1713 1714 ierr = PetscObjectQuery((PetscObject)pc->pmat,"_convert_nest_lfields",(PetscObject*)&c);CHKERRQ(ierr); 1715 if (c) { 1716 MatISLocalFields lf; 1717 ierr = PetscContainerGetPointer(c,(void**)&lf);CHKERRQ(ierr); 1718 ierr = PCBDDCSetDofsSplittingLocal(pc,lf->nr,lf->rf);CHKERRQ(ierr); 1719 } else { /* fallback, create the default fields if bs > 1 */ 1720 PetscInt i, n = matis->A->rmap->n; 1721 ierr = MatGetBlockSize(pc->pmat,&i);CHKERRQ(ierr); 1722 if (i > 1) { 1723 pcbddc->n_ISForDofsLocal = i; 1724 ierr = PetscMalloc1(pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal);CHKERRQ(ierr); 1725 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1726 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n/pcbddc->n_ISForDofsLocal,i,pcbddc->n_ISForDofsLocal,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1727 } 1728 } 1729 } 1730 } 1731 } else { 1732 PetscInt i; 1733 for (i=0;i<pcbddc->n_ISForDofsLocal;i++) { 1734 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->ISForDofsLocal[i]);CHKERRQ(ierr); 1735 } 1736 } 1737 } 1738 1739 boundary: 1740 if (!pcbddc->DirichletBoundariesLocal && pcbddc->DirichletBoundaries) { 1741 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->DirichletBoundaries,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1742 } else if (pcbddc->DirichletBoundariesLocal) { 1743 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LAND,&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 1744 } 1745 if (!pcbddc->NeumannBoundariesLocal && pcbddc->NeumannBoundaries) { 1746 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->NeumannBoundaries,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1747 } else if (pcbddc->NeumannBoundariesLocal) { 1748 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 1749 } 1750 if (!pcbddc->user_primal_vertices_local && pcbddc->user_primal_vertices) { 1751 ierr = PCBDDCGlobalToLocal(matis->rctx,global,local,pcbddc->user_primal_vertices,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 1752 } 1753 ierr = VecDestroy(&global);CHKERRQ(ierr); 1754 ierr = VecDestroy(&local);CHKERRQ(ierr); 1755 /* detect local disconnected subdomains if requested (use matis->A) */ 1756 if (pcbddc->detect_disconnected) { 1757 IS primalv = NULL; 1758 PetscInt i; 1759 1760 for (i=0;i<pcbddc->n_local_subs;i++) { 1761 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 1762 } 1763 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 1764 ierr = PCBDDCDetectDisconnectedComponents(pc,&pcbddc->n_local_subs,&pcbddc->local_subs,&primalv);CHKERRQ(ierr); 1765 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,primalv);CHKERRQ(ierr); 1766 ierr = ISDestroy(&primalv);CHKERRQ(ierr); 1767 } 1768 /* early stage corner detection */ 1769 { 1770 DM dm; 1771 1772 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 1773 if (dm) { 1774 PetscBool isda; 1775 1776 ierr = PetscObjectTypeCompare((PetscObject)dm,DMDA,&isda);CHKERRQ(ierr); 1777 if (isda) { 1778 ISLocalToGlobalMapping l2l; 1779 IS corners; 1780 Mat lA; 1781 1782 ierr = DMDAGetSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1783 ierr = MatISGetLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1784 ierr = MatGetLocalToGlobalMapping(lA,&l2l,NULL);CHKERRQ(ierr); 1785 ierr = MatISRestoreLocalMat(pc->pmat,&lA);CHKERRQ(ierr); 1786 if (l2l) { 1787 const PetscInt *idx; 1788 PetscInt bs,*idxout,n; 1789 1790 ierr = ISLocalToGlobalMappingGetBlockSize(l2l,&bs);CHKERRQ(ierr); 1791 ierr = ISGetLocalSize(corners,&n);CHKERRQ(ierr); 1792 ierr = ISGetIndices(corners,&idx);CHKERRQ(ierr); 1793 ierr = PetscMalloc1(n,&idxout);CHKERRQ(ierr); 1794 ierr = ISLocalToGlobalMappingApplyBlock(l2l,n,idx,idxout);CHKERRQ(ierr); 1795 ierr = ISRestoreIndices(corners,&idx);CHKERRQ(ierr); 1796 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1797 ierr = ISCreateBlock(PetscObjectComm((PetscObject)pc),bs,n,idxout,PETSC_OWN_POINTER,&corners);CHKERRQ(ierr); 1798 ierr = PCBDDCAddPrimalVerticesLocalIS(pc,corners);CHKERRQ(ierr); 1799 ierr = ISDestroy(&corners);CHKERRQ(ierr); 1800 } else { /* not from DMDA */ 1801 ierr = DMDARestoreSubdomainCornersIS(dm,&corners);CHKERRQ(ierr); 1802 } 1803 } 1804 } 1805 } 1806 PetscFunctionReturn(0); 1807 } 1808 1809 PetscErrorCode PCBDDCConsistencyCheckIS(PC pc, MPI_Op mop, IS *is) 1810 { 1811 Mat_IS *matis = (Mat_IS*)(pc->pmat->data); 1812 PetscErrorCode ierr; 1813 IS nis; 1814 const PetscInt *idxs; 1815 PetscInt i,nd,n = matis->A->rmap->n,*nidxs,nnd; 1816 PetscBool *ld; 1817 1818 PetscFunctionBegin; 1819 if (mop != MPI_LAND && mop != MPI_LOR) SETERRQ(PetscObjectComm((PetscObject)(pc)),PETSC_ERR_SUP,"Supported are MPI_LAND and MPI_LOR"); 1820 ierr = MatISSetUpSF(pc->pmat);CHKERRQ(ierr); 1821 if (mop == MPI_LAND) { 1822 /* init rootdata with true */ 1823 ld = (PetscBool*) matis->sf_rootdata; 1824 for (i=0;i<pc->pmat->rmap->n;i++) ld[i] = PETSC_TRUE; 1825 } else { 1826 ierr = PetscMemzero(matis->sf_rootdata,pc->pmat->rmap->n*sizeof(PetscBool));CHKERRQ(ierr); 1827 } 1828 ierr = PetscMemzero(matis->sf_leafdata,n*sizeof(PetscBool));CHKERRQ(ierr); 1829 ierr = ISGetLocalSize(*is,&nd);CHKERRQ(ierr); 1830 ierr = ISGetIndices(*is,&idxs);CHKERRQ(ierr); 1831 ld = (PetscBool*) matis->sf_leafdata; 1832 for (i=0;i<nd;i++) 1833 if (-1 < idxs[i] && idxs[i] < n) 1834 ld[idxs[i]] = PETSC_TRUE; 1835 ierr = ISRestoreIndices(*is,&idxs);CHKERRQ(ierr); 1836 ierr = PetscSFReduceBegin(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1837 ierr = PetscSFReduceEnd(matis->sf,MPIU_BOOL,matis->sf_leafdata,matis->sf_rootdata,mop);CHKERRQ(ierr); 1838 ierr = PetscSFBcastBegin(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1839 ierr = PetscSFBcastEnd(matis->sf,MPIU_BOOL,matis->sf_rootdata,matis->sf_leafdata);CHKERRQ(ierr); 1840 if (mop == MPI_LAND) { 1841 ierr = PetscMalloc1(nd,&nidxs);CHKERRQ(ierr); 1842 } else { 1843 ierr = PetscMalloc1(n,&nidxs);CHKERRQ(ierr); 1844 } 1845 for (i=0,nnd=0;i<n;i++) 1846 if (ld[i]) 1847 nidxs[nnd++] = i; 1848 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(*is)),nnd,nidxs,PETSC_OWN_POINTER,&nis);CHKERRQ(ierr); 1849 ierr = ISDestroy(is);CHKERRQ(ierr); 1850 *is = nis; 1851 PetscFunctionReturn(0); 1852 } 1853 1854 PetscErrorCode PCBDDCBenignRemoveInterior(PC pc,Vec r,Vec z) 1855 { 1856 PC_IS *pcis = (PC_IS*)(pc->data); 1857 PC_BDDC *pcbddc = (PC_BDDC*)(pc->data); 1858 PetscErrorCode ierr; 1859 1860 PetscFunctionBegin; 1861 if (!pcbddc->benign_have_null) { 1862 PetscFunctionReturn(0); 1863 } 1864 if (pcbddc->ChangeOfBasisMatrix) { 1865 Vec swap; 1866 1867 ierr = MatMultTranspose(pcbddc->ChangeOfBasisMatrix,r,pcbddc->work_change);CHKERRQ(ierr); 1868 swap = pcbddc->work_change; 1869 pcbddc->work_change = r; 1870 r = swap; 1871 } 1872 ierr = VecScatterBegin(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1873 ierr = VecScatterEnd(pcis->global_to_D,r,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 1874 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 1875 ierr = VecSet(z,0.);CHKERRQ(ierr); 1876 ierr = VecScatterBegin(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1877 ierr = VecScatterEnd(pcis->global_to_D,pcis->vec2_D,z,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 1878 if (pcbddc->ChangeOfBasisMatrix) { 1879 pcbddc->work_change = r; 1880 ierr = VecCopy(z,pcbddc->work_change);CHKERRQ(ierr); 1881 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcbddc->work_change,z);CHKERRQ(ierr); 1882 } 1883 PetscFunctionReturn(0); 1884 } 1885 1886 PetscErrorCode PCBDDCBenignMatMult_Private_Private(Mat A, Vec x, Vec y, PetscBool transpose) 1887 { 1888 PCBDDCBenignMatMult_ctx ctx; 1889 PetscErrorCode ierr; 1890 PetscBool apply_right,apply_left,reset_x; 1891 1892 PetscFunctionBegin; 1893 ierr = MatShellGetContext(A,&ctx);CHKERRQ(ierr); 1894 if (transpose) { 1895 apply_right = ctx->apply_left; 1896 apply_left = ctx->apply_right; 1897 } else { 1898 apply_right = ctx->apply_right; 1899 apply_left = ctx->apply_left; 1900 } 1901 reset_x = PETSC_FALSE; 1902 if (apply_right) { 1903 const PetscScalar *ax; 1904 PetscInt nl,i; 1905 1906 ierr = VecGetLocalSize(x,&nl);CHKERRQ(ierr); 1907 ierr = VecGetArrayRead(x,&ax);CHKERRQ(ierr); 1908 ierr = PetscMemcpy(ctx->work,ax,nl*sizeof(PetscScalar));CHKERRQ(ierr); 1909 ierr = VecRestoreArrayRead(x,&ax);CHKERRQ(ierr); 1910 for (i=0;i<ctx->benign_n;i++) { 1911 PetscScalar sum,val; 1912 const PetscInt *idxs; 1913 PetscInt nz,j; 1914 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1915 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1916 sum = 0.; 1917 if (ctx->apply_p0) { 1918 val = ctx->work[idxs[nz-1]]; 1919 for (j=0;j<nz-1;j++) { 1920 sum += ctx->work[idxs[j]]; 1921 ctx->work[idxs[j]] += val; 1922 } 1923 } else { 1924 for (j=0;j<nz-1;j++) { 1925 sum += ctx->work[idxs[j]]; 1926 } 1927 } 1928 ctx->work[idxs[nz-1]] -= sum; 1929 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1930 } 1931 ierr = VecPlaceArray(x,ctx->work);CHKERRQ(ierr); 1932 reset_x = PETSC_TRUE; 1933 } 1934 if (transpose) { 1935 ierr = MatMultTranspose(ctx->A,x,y);CHKERRQ(ierr); 1936 } else { 1937 ierr = MatMult(ctx->A,x,y);CHKERRQ(ierr); 1938 } 1939 if (reset_x) { 1940 ierr = VecResetArray(x);CHKERRQ(ierr); 1941 } 1942 if (apply_left) { 1943 PetscScalar *ay; 1944 PetscInt i; 1945 1946 ierr = VecGetArray(y,&ay);CHKERRQ(ierr); 1947 for (i=0;i<ctx->benign_n;i++) { 1948 PetscScalar sum,val; 1949 const PetscInt *idxs; 1950 PetscInt nz,j; 1951 ierr = ISGetLocalSize(ctx->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 1952 ierr = ISGetIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1953 val = -ay[idxs[nz-1]]; 1954 if (ctx->apply_p0) { 1955 sum = 0.; 1956 for (j=0;j<nz-1;j++) { 1957 sum += ay[idxs[j]]; 1958 ay[idxs[j]] += val; 1959 } 1960 ay[idxs[nz-1]] += sum; 1961 } else { 1962 for (j=0;j<nz-1;j++) { 1963 ay[idxs[j]] += val; 1964 } 1965 ay[idxs[nz-1]] = 0.; 1966 } 1967 ierr = ISRestoreIndices(ctx->benign_zerodiag_subs[i],&idxs);CHKERRQ(ierr); 1968 } 1969 ierr = VecRestoreArray(y,&ay);CHKERRQ(ierr); 1970 } 1971 PetscFunctionReturn(0); 1972 } 1973 1974 PetscErrorCode PCBDDCBenignMatMultTranspose_Private(Mat A, Vec x, Vec y) 1975 { 1976 PetscErrorCode ierr; 1977 1978 PetscFunctionBegin; 1979 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_TRUE);CHKERRQ(ierr); 1980 PetscFunctionReturn(0); 1981 } 1982 1983 PetscErrorCode PCBDDCBenignMatMult_Private(Mat A, Vec x, Vec y) 1984 { 1985 PetscErrorCode ierr; 1986 1987 PetscFunctionBegin; 1988 ierr = PCBDDCBenignMatMult_Private_Private(A,x,y,PETSC_FALSE);CHKERRQ(ierr); 1989 PetscFunctionReturn(0); 1990 } 1991 1992 PetscErrorCode PCBDDCBenignShellMat(PC pc, PetscBool restore) 1993 { 1994 PC_IS *pcis = (PC_IS*)pc->data; 1995 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 1996 PCBDDCBenignMatMult_ctx ctx; 1997 PetscErrorCode ierr; 1998 1999 PetscFunctionBegin; 2000 if (!restore) { 2001 Mat A_IB,A_BI; 2002 PetscScalar *work; 2003 PCBDDCReuseSolvers reuse = pcbddc->sub_schurs ? pcbddc->sub_schurs->reuse_solver : NULL; 2004 2005 if (pcbddc->benign_original_mat) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Benign original mat has not been restored"); 2006 if (!pcbddc->benign_change || !pcbddc->benign_n || pcbddc->benign_change_explicit) PetscFunctionReturn(0); 2007 ierr = PetscMalloc1(pcis->n,&work);CHKERRQ(ierr); 2008 ierr = MatCreate(PETSC_COMM_SELF,&A_IB);CHKERRQ(ierr); 2009 ierr = MatSetSizes(A_IB,pcis->n-pcis->n_B,pcis->n_B,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2010 ierr = MatSetType(A_IB,MATSHELL);CHKERRQ(ierr); 2011 ierr = MatShellSetOperation(A_IB,MATOP_MULT,(void (*)(void))PCBDDCBenignMatMult_Private);CHKERRQ(ierr); 2012 ierr = MatShellSetOperation(A_IB,MATOP_MULT_TRANSPOSE,(void (*)(void))PCBDDCBenignMatMultTranspose_Private);CHKERRQ(ierr); 2013 ierr = PetscNew(&ctx);CHKERRQ(ierr); 2014 ierr = MatShellSetContext(A_IB,ctx);CHKERRQ(ierr); 2015 ctx->apply_left = PETSC_TRUE; 2016 ctx->apply_right = PETSC_FALSE; 2017 ctx->apply_p0 = PETSC_FALSE; 2018 ctx->benign_n = pcbddc->benign_n; 2019 if (reuse) { 2020 ctx->benign_zerodiag_subs = reuse->benign_zerodiag_subs; 2021 ctx->free = PETSC_FALSE; 2022 } else { /* TODO: could be optimized for successive solves */ 2023 ISLocalToGlobalMapping N_to_D; 2024 PetscInt i; 2025 2026 ierr = ISLocalToGlobalMappingCreateIS(pcis->is_I_local,&N_to_D);CHKERRQ(ierr); 2027 ierr = PetscMalloc1(pcbddc->benign_n,&ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2028 for (i=0;i<pcbddc->benign_n;i++) { 2029 ierr = ISGlobalToLocalMappingApplyIS(N_to_D,IS_GTOLM_DROP,pcbddc->benign_zerodiag_subs[i],&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2030 } 2031 ierr = ISLocalToGlobalMappingDestroy(&N_to_D);CHKERRQ(ierr); 2032 ctx->free = PETSC_TRUE; 2033 } 2034 ctx->A = pcis->A_IB; 2035 ctx->work = work; 2036 ierr = MatSetUp(A_IB);CHKERRQ(ierr); 2037 ierr = MatAssemblyBegin(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2038 ierr = MatAssemblyEnd(A_IB,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2039 pcis->A_IB = A_IB; 2040 2041 /* A_BI as A_IB^T */ 2042 ierr = MatCreateTranspose(A_IB,&A_BI);CHKERRQ(ierr); 2043 pcbddc->benign_original_mat = pcis->A_BI; 2044 pcis->A_BI = A_BI; 2045 } else { 2046 if (!pcbddc->benign_original_mat) { 2047 PetscFunctionReturn(0); 2048 } 2049 ierr = MatShellGetContext(pcis->A_IB,&ctx);CHKERRQ(ierr); 2050 ierr = MatDestroy(&pcis->A_IB);CHKERRQ(ierr); 2051 pcis->A_IB = ctx->A; 2052 ctx->A = NULL; 2053 ierr = MatDestroy(&pcis->A_BI);CHKERRQ(ierr); 2054 pcis->A_BI = pcbddc->benign_original_mat; 2055 pcbddc->benign_original_mat = NULL; 2056 if (ctx->free) { 2057 PetscInt i; 2058 for (i=0;i<ctx->benign_n;i++) { 2059 ierr = ISDestroy(&ctx->benign_zerodiag_subs[i]);CHKERRQ(ierr); 2060 } 2061 ierr = PetscFree(ctx->benign_zerodiag_subs);CHKERRQ(ierr); 2062 } 2063 ierr = PetscFree(ctx->work);CHKERRQ(ierr); 2064 ierr = PetscFree(ctx);CHKERRQ(ierr); 2065 } 2066 PetscFunctionReturn(0); 2067 } 2068 2069 /* used just in bddc debug mode */ 2070 PetscErrorCode PCBDDCBenignProject(PC pc, IS is1, IS is2, Mat *B) 2071 { 2072 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 2073 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2074 Mat An; 2075 PetscErrorCode ierr; 2076 2077 PetscFunctionBegin; 2078 ierr = MatPtAP(matis->A,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&An);CHKERRQ(ierr); 2079 ierr = MatZeroRowsColumns(An,pcbddc->benign_n,pcbddc->benign_p0_lidx,1.0,NULL,NULL);CHKERRQ(ierr); 2080 if (is1) { 2081 ierr = MatCreateSubMatrix(An,is1,is2,MAT_INITIAL_MATRIX,B);CHKERRQ(ierr); 2082 ierr = MatDestroy(&An);CHKERRQ(ierr); 2083 } else { 2084 *B = An; 2085 } 2086 PetscFunctionReturn(0); 2087 } 2088 2089 /* TODO: add reuse flag */ 2090 PetscErrorCode MatSeqAIJCompress(Mat A, Mat *B) 2091 { 2092 Mat Bt; 2093 PetscScalar *a,*bdata; 2094 const PetscInt *ii,*ij; 2095 PetscInt m,n,i,nnz,*bii,*bij; 2096 PetscBool flg_row; 2097 PetscErrorCode ierr; 2098 2099 PetscFunctionBegin; 2100 ierr = MatGetSize(A,&n,&m);CHKERRQ(ierr); 2101 ierr = MatGetRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2102 ierr = MatSeqAIJGetArray(A,&a);CHKERRQ(ierr); 2103 nnz = n; 2104 for (i=0;i<ii[n];i++) { 2105 if (PetscLikely(PetscAbsScalar(a[i]) > PETSC_SMALL)) nnz++; 2106 } 2107 ierr = PetscMalloc1(n+1,&bii);CHKERRQ(ierr); 2108 ierr = PetscMalloc1(nnz,&bij);CHKERRQ(ierr); 2109 ierr = PetscMalloc1(nnz,&bdata);CHKERRQ(ierr); 2110 nnz = 0; 2111 bii[0] = 0; 2112 for (i=0;i<n;i++) { 2113 PetscInt j; 2114 for (j=ii[i];j<ii[i+1];j++) { 2115 PetscScalar entry = a[j]; 2116 if (PetscLikely(PetscAbsScalar(entry) > PETSC_SMALL) || ij[j] == i) { 2117 bij[nnz] = ij[j]; 2118 bdata[nnz] = entry; 2119 nnz++; 2120 } 2121 } 2122 bii[i+1] = nnz; 2123 } 2124 ierr = MatSeqAIJRestoreArray(A,&a);CHKERRQ(ierr); 2125 ierr = MatCreateSeqAIJWithArrays(PetscObjectComm((PetscObject)A),n,m,bii,bij,bdata,&Bt);CHKERRQ(ierr); 2126 ierr = MatRestoreRowIJ(A,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&ij,&flg_row);CHKERRQ(ierr); 2127 { 2128 Mat_SeqAIJ *b = (Mat_SeqAIJ*)(Bt->data); 2129 b->free_a = PETSC_TRUE; 2130 b->free_ij = PETSC_TRUE; 2131 } 2132 *B = Bt; 2133 PetscFunctionReturn(0); 2134 } 2135 2136 PetscErrorCode PCBDDCDetectDisconnectedComponents(PC pc, PetscInt *ncc, IS* cc[], IS* primalv) 2137 { 2138 Mat B = NULL; 2139 DM dm; 2140 IS is_dummy,*cc_n; 2141 ISLocalToGlobalMapping l2gmap_dummy; 2142 PCBDDCGraph graph; 2143 PetscInt *xadj_filtered = NULL,*adjncy_filtered = NULL; 2144 PetscInt i,n; 2145 PetscInt *xadj,*adjncy; 2146 PetscBool isplex = PETSC_FALSE; 2147 PetscErrorCode ierr; 2148 2149 PetscFunctionBegin; 2150 if (ncc) *ncc = 0; 2151 if (cc) *cc = NULL; 2152 if (primalv) *primalv = NULL; 2153 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 2154 ierr = PCGetDM(pc,&dm);CHKERRQ(ierr); 2155 if (!dm) { 2156 ierr = MatGetDM(pc->pmat,&dm);CHKERRQ(ierr); 2157 } 2158 if (dm) { 2159 ierr = PetscObjectTypeCompare((PetscObject)dm,DMPLEX,&isplex);CHKERRQ(ierr); 2160 } 2161 if (isplex) { /* this code has been modified from plexpartition.c */ 2162 PetscInt p, pStart, pEnd, a, adjSize, idx, size, nroots; 2163 PetscInt *adj = NULL; 2164 IS cellNumbering; 2165 const PetscInt *cellNum; 2166 PetscBool useCone, useClosure; 2167 PetscSection section; 2168 PetscSegBuffer adjBuffer; 2169 PetscSF sfPoint; 2170 PetscErrorCode ierr; 2171 2172 PetscFunctionBegin; 2173 ierr = DMPlexGetHeightStratum(dm, 0, &pStart, &pEnd);CHKERRQ(ierr); 2174 ierr = DMGetPointSF(dm, &sfPoint);CHKERRQ(ierr); 2175 ierr = PetscSFGetGraph(sfPoint, &nroots, NULL, NULL, NULL);CHKERRQ(ierr); 2176 /* Build adjacency graph via a section/segbuffer */ 2177 ierr = PetscSectionCreate(PetscObjectComm((PetscObject) dm), §ion);CHKERRQ(ierr); 2178 ierr = PetscSectionSetChart(section, pStart, pEnd);CHKERRQ(ierr); 2179 ierr = PetscSegBufferCreate(sizeof(PetscInt),1000,&adjBuffer);CHKERRQ(ierr); 2180 /* Always use FVM adjacency to create partitioner graph */ 2181 ierr = DMPlexGetAdjacencyUseCone(dm, &useCone);CHKERRQ(ierr); 2182 ierr = DMPlexGetAdjacencyUseClosure(dm, &useClosure);CHKERRQ(ierr); 2183 ierr = DMPlexSetAdjacencyUseCone(dm, PETSC_TRUE);CHKERRQ(ierr); 2184 ierr = DMPlexSetAdjacencyUseClosure(dm, PETSC_FALSE);CHKERRQ(ierr); 2185 ierr = DMPlexGetCellNumbering(dm, &cellNumbering);CHKERRQ(ierr); 2186 ierr = ISGetIndices(cellNumbering, &cellNum);CHKERRQ(ierr); 2187 for (n = 0, p = pStart; p < pEnd; p++) { 2188 /* Skip non-owned cells in parallel (ParMetis expects no overlap) */ 2189 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2190 adjSize = PETSC_DETERMINE; 2191 ierr = DMPlexGetAdjacency(dm, p, &adjSize, &adj);CHKERRQ(ierr); 2192 for (a = 0; a < adjSize; ++a) { 2193 const PetscInt point = adj[a]; 2194 if (pStart <= point && point < pEnd) { 2195 PetscInt *PETSC_RESTRICT pBuf; 2196 ierr = PetscSectionAddDof(section, p, 1);CHKERRQ(ierr); 2197 ierr = PetscSegBufferGetInts(adjBuffer, 1, &pBuf);CHKERRQ(ierr); 2198 *pBuf = point; 2199 } 2200 } 2201 n++; 2202 } 2203 ierr = DMPlexSetAdjacencyUseCone(dm, useCone);CHKERRQ(ierr); 2204 ierr = DMPlexSetAdjacencyUseClosure(dm, useClosure);CHKERRQ(ierr); 2205 /* Derive CSR graph from section/segbuffer */ 2206 ierr = PetscSectionSetUp(section);CHKERRQ(ierr); 2207 ierr = PetscSectionGetStorageSize(section, &size);CHKERRQ(ierr); 2208 ierr = PetscMalloc1(n+1, &xadj);CHKERRQ(ierr); 2209 for (idx = 0, p = pStart; p < pEnd; p++) { 2210 if (nroots > 0) {if (cellNum[p] < 0) continue;} 2211 ierr = PetscSectionGetOffset(section, p, &(xadj[idx++]));CHKERRQ(ierr); 2212 } 2213 xadj[n] = size; 2214 ierr = PetscSegBufferExtractAlloc(adjBuffer, &adjncy);CHKERRQ(ierr); 2215 /* Clean up */ 2216 ierr = PetscSegBufferDestroy(&adjBuffer);CHKERRQ(ierr); 2217 ierr = PetscSectionDestroy(§ion);CHKERRQ(ierr); 2218 ierr = PetscFree(adj);CHKERRQ(ierr); 2219 graph->xadj = xadj; 2220 graph->adjncy = adjncy; 2221 } else { 2222 Mat A; 2223 PetscBool filter = PETSC_FALSE, isseqaij, flg_row; 2224 2225 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2226 if (!A->rmap->N || !A->cmap->N) { 2227 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2228 PetscFunctionReturn(0); 2229 } 2230 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 2231 if (!isseqaij && filter) { 2232 PetscBool isseqdense; 2233 2234 ierr = PetscObjectTypeCompare((PetscObject)A,MATSEQDENSE,&isseqdense);CHKERRQ(ierr); 2235 if (!isseqdense) { 2236 ierr = MatConvert(A,MATSEQAIJ,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 2237 } else { /* TODO: rectangular case and LDA */ 2238 PetscScalar *array; 2239 PetscReal chop=1.e-6; 2240 2241 ierr = MatDuplicate(A,MAT_COPY_VALUES,&B);CHKERRQ(ierr); 2242 ierr = MatDenseGetArray(B,&array);CHKERRQ(ierr); 2243 ierr = MatGetSize(B,&n,NULL);CHKERRQ(ierr); 2244 for (i=0;i<n;i++) { 2245 PetscInt j; 2246 for (j=i+1;j<n;j++) { 2247 PetscReal thresh = chop*(PetscAbsScalar(array[i*(n+1)])+PetscAbsScalar(array[j*(n+1)])); 2248 if (PetscAbsScalar(array[i*n+j]) < thresh) array[i*n+j] = 0.; 2249 if (PetscAbsScalar(array[j*n+i]) < thresh) array[j*n+i] = 0.; 2250 } 2251 } 2252 ierr = MatDenseRestoreArray(B,&array);CHKERRQ(ierr); 2253 ierr = MatConvert(B,MATSEQAIJ,MAT_INPLACE_MATRIX,&B);CHKERRQ(ierr); 2254 } 2255 } else { 2256 ierr = PetscObjectReference((PetscObject)A);CHKERRQ(ierr); 2257 B = A; 2258 } 2259 ierr = MatGetRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2260 2261 /* if filter is true, then removes entries lower than PETSC_SMALL in magnitude */ 2262 if (filter) { 2263 PetscScalar *data; 2264 PetscInt j,cum; 2265 2266 ierr = PetscCalloc2(n+1,&xadj_filtered,xadj[n],&adjncy_filtered);CHKERRQ(ierr); 2267 ierr = MatSeqAIJGetArray(B,&data);CHKERRQ(ierr); 2268 cum = 0; 2269 for (i=0;i<n;i++) { 2270 PetscInt t; 2271 2272 for (j=xadj[i];j<xadj[i+1];j++) { 2273 if (PetscUnlikely(PetscAbsScalar(data[j]) < PETSC_SMALL)) { 2274 continue; 2275 } 2276 adjncy_filtered[cum+xadj_filtered[i]++] = adjncy[j]; 2277 } 2278 t = xadj_filtered[i]; 2279 xadj_filtered[i] = cum; 2280 cum += t; 2281 } 2282 ierr = MatSeqAIJRestoreArray(B,&data);CHKERRQ(ierr); 2283 graph->xadj = xadj_filtered; 2284 graph->adjncy = adjncy_filtered; 2285 } else { 2286 graph->xadj = xadj; 2287 graph->adjncy = adjncy; 2288 } 2289 } 2290 /* compute local connected components using PCBDDCGraph */ 2291 ierr = ISCreateStride(PETSC_COMM_SELF,n,0,1,&is_dummy);CHKERRQ(ierr); 2292 ierr = ISLocalToGlobalMappingCreateIS(is_dummy,&l2gmap_dummy);CHKERRQ(ierr); 2293 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 2294 ierr = PCBDDCGraphInit(graph,l2gmap_dummy,n,PETSC_MAX_INT);CHKERRQ(ierr); 2295 ierr = ISLocalToGlobalMappingDestroy(&l2gmap_dummy);CHKERRQ(ierr); 2296 ierr = PCBDDCGraphSetUp(graph,1,NULL,NULL,0,NULL,NULL);CHKERRQ(ierr); 2297 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 2298 2299 /* partial clean up */ 2300 ierr = PetscFree2(xadj_filtered,adjncy_filtered);CHKERRQ(ierr); 2301 if (B) { 2302 PetscBool flg_row; 2303 ierr = MatRestoreRowIJ(B,0,PETSC_TRUE,PETSC_FALSE,&n,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 2304 ierr = MatDestroy(&B);CHKERRQ(ierr); 2305 } 2306 if (isplex) { 2307 ierr = PetscFree(xadj);CHKERRQ(ierr); 2308 ierr = PetscFree(adjncy);CHKERRQ(ierr); 2309 } 2310 2311 /* get back data */ 2312 if (isplex) { 2313 if (ncc) *ncc = graph->ncc; 2314 if (cc || primalv) { 2315 Mat A; 2316 PetscBT btv,btvt; 2317 PetscSection subSection; 2318 PetscInt *ids,cum,cump,*cids,*pids; 2319 2320 ierr = DMPlexGetSubdomainSection(dm,&subSection);CHKERRQ(ierr); 2321 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2322 ierr = PetscMalloc3(A->rmap->n,&ids,graph->ncc+1,&cids,A->rmap->n,&pids);CHKERRQ(ierr); 2323 ierr = PetscBTCreate(A->rmap->n,&btv);CHKERRQ(ierr); 2324 ierr = PetscBTCreate(A->rmap->n,&btvt);CHKERRQ(ierr); 2325 2326 cids[0] = 0; 2327 for (i = 0, cump = 0, cum = 0; i < graph->ncc; i++) { 2328 PetscInt j; 2329 2330 ierr = PetscBTMemzero(A->rmap->n,btvt);CHKERRQ(ierr); 2331 for (j = graph->cptr[i]; j < graph->cptr[i+1]; j++) { 2332 PetscInt k, size, *closure = NULL, cell = graph->queue[j]; 2333 2334 ierr = DMPlexGetTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2335 for (k = 0; k < 2*size; k += 2) { 2336 PetscInt s, p = closure[k], off, dof, cdof; 2337 2338 ierr = PetscSectionGetConstraintDof(subSection, p, &cdof);CHKERRQ(ierr); 2339 ierr = PetscSectionGetOffset(subSection,p,&off);CHKERRQ(ierr); 2340 ierr = PetscSectionGetDof(subSection,p,&dof);CHKERRQ(ierr); 2341 for (s = 0; s < dof-cdof; s++) { 2342 if (PetscBTLookupSet(btvt,off+s)) continue; 2343 if (!PetscBTLookup(btv,off+s)) { 2344 ids[cum++] = off+s; 2345 } else { /* cross-vertex */ 2346 pids[cump++] = off+s; 2347 } 2348 } 2349 } 2350 ierr = DMPlexRestoreTransitiveClosure(dm,cell,PETSC_TRUE,&size,&closure);CHKERRQ(ierr); 2351 } 2352 cids[i+1] = cum; 2353 /* mark dofs as already assigned */ 2354 for (j = cids[i]; j < cids[i+1]; j++) { 2355 ierr = PetscBTSet(btv,ids[j]);CHKERRQ(ierr); 2356 } 2357 } 2358 if (cc) { 2359 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2360 for (i = 0; i < graph->ncc; i++) { 2361 ierr = ISCreateGeneral(PETSC_COMM_SELF,cids[i+1]-cids[i],ids+cids[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2362 } 2363 *cc = cc_n; 2364 } 2365 if (primalv) { 2366 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),cump,pids,PETSC_COPY_VALUES,primalv);CHKERRQ(ierr); 2367 } 2368 ierr = PetscFree3(ids,cids,pids);CHKERRQ(ierr); 2369 ierr = PetscBTDestroy(&btv);CHKERRQ(ierr); 2370 ierr = PetscBTDestroy(&btvt);CHKERRQ(ierr); 2371 } 2372 } else { 2373 if (ncc) *ncc = graph->ncc; 2374 if (cc) { 2375 ierr = PetscMalloc1(graph->ncc,&cc_n);CHKERRQ(ierr); 2376 for (i=0;i<graph->ncc;i++) { 2377 ierr = ISCreateGeneral(PETSC_COMM_SELF,graph->cptr[i+1]-graph->cptr[i],graph->queue+graph->cptr[i],PETSC_COPY_VALUES,&cc_n[i]);CHKERRQ(ierr); 2378 } 2379 *cc = cc_n; 2380 } 2381 } 2382 /* clean up graph */ 2383 graph->xadj = 0; 2384 graph->adjncy = 0; 2385 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 2386 PetscFunctionReturn(0); 2387 } 2388 2389 PetscErrorCode PCBDDCBenignCheck(PC pc, IS zerodiag) 2390 { 2391 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2392 PC_IS* pcis = (PC_IS*)(pc->data); 2393 IS dirIS = NULL; 2394 PetscInt i; 2395 PetscErrorCode ierr; 2396 2397 PetscFunctionBegin; 2398 ierr = PCBDDCGraphGetDirichletDofs(pcbddc->mat_graph,&dirIS);CHKERRQ(ierr); 2399 if (zerodiag) { 2400 Mat A; 2401 Vec vec3_N; 2402 PetscScalar *vals; 2403 const PetscInt *idxs; 2404 PetscInt nz,*count; 2405 2406 /* p0 */ 2407 ierr = VecSet(pcis->vec1_N,0.);CHKERRQ(ierr); 2408 ierr = PetscMalloc1(pcis->n,&vals);CHKERRQ(ierr); 2409 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2410 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2411 for (i=0;i<nz;i++) vals[i] = 1.; 2412 ierr = VecSetValues(pcis->vec1_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2413 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 2414 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 2415 /* v_I */ 2416 ierr = VecSetRandom(pcis->vec2_N,NULL);CHKERRQ(ierr); 2417 for (i=0;i<nz;i++) vals[i] = 0.; 2418 ierr = VecSetValues(pcis->vec2_N,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2419 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2420 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2421 for (i=0;i<pcis->n_B;i++) vals[i] = 0.; 2422 ierr = VecSetValues(pcis->vec2_N,pcis->n_B,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2423 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2424 if (dirIS) { 2425 PetscInt n; 2426 2427 ierr = ISGetLocalSize(dirIS,&n);CHKERRQ(ierr); 2428 ierr = ISGetIndices(dirIS,&idxs);CHKERRQ(ierr); 2429 for (i=0;i<n;i++) vals[i] = 0.; 2430 ierr = VecSetValues(pcis->vec2_N,n,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2431 ierr = ISRestoreIndices(dirIS,&idxs);CHKERRQ(ierr); 2432 } 2433 ierr = VecAssemblyBegin(pcis->vec2_N);CHKERRQ(ierr); 2434 ierr = VecAssemblyEnd(pcis->vec2_N);CHKERRQ(ierr); 2435 ierr = VecDuplicate(pcis->vec1_N,&vec3_N);CHKERRQ(ierr); 2436 ierr = VecSet(vec3_N,0.);CHKERRQ(ierr); 2437 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2438 ierr = MatMult(A,pcis->vec1_N,vec3_N);CHKERRQ(ierr); 2439 ierr = VecDot(vec3_N,pcis->vec2_N,&vals[0]);CHKERRQ(ierr); 2440 if (PetscAbsScalar(vals[0]) > 1.e-1) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! b(v_I,p_0) = %1.6e (should be numerically 0.)",PetscAbsScalar(vals[0])); 2441 ierr = PetscFree(vals);CHKERRQ(ierr); 2442 ierr = VecDestroy(&vec3_N);CHKERRQ(ierr); 2443 2444 /* there should not be any pressure dofs lying on the interface */ 2445 ierr = PetscCalloc1(pcis->n,&count);CHKERRQ(ierr); 2446 ierr = ISGetIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2447 for (i=0;i<pcis->n_B;i++) count[idxs[i]]++; 2448 ierr = ISRestoreIndices(pcis->is_B_local,&idxs);CHKERRQ(ierr); 2449 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2450 for (i=0;i<nz;i++) if (count[idxs[i]]) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_SUP,"Benign trick can not be applied! pressure dof %d is an interface dof",idxs[i]); 2451 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2452 ierr = PetscFree(count);CHKERRQ(ierr); 2453 } 2454 ierr = ISDestroy(&dirIS);CHKERRQ(ierr); 2455 2456 /* check PCBDDCBenignGetOrSetP0 */ 2457 ierr = VecSetRandom(pcis->vec1_global,NULL);CHKERRQ(ierr); 2458 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = -PetscGlobalRank-i; 2459 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_FALSE);CHKERRQ(ierr); 2460 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = 1; 2461 ierr = PCBDDCBenignGetOrSetP0(pc,pcis->vec1_global,PETSC_TRUE);CHKERRQ(ierr); 2462 for (i=0;i<pcbddc->benign_n;i++) { 2463 PetscInt val = PetscRealPart(pcbddc->benign_p0[i]); 2464 if (val != -PetscGlobalRank-i) SETERRQ3(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error testing PCBDDCBenignGetOrSetP0! Found %g at %d instead of %g\n",PetscRealPart(pcbddc->benign_p0[i]),i,-PetscGlobalRank-i); 2465 } 2466 PetscFunctionReturn(0); 2467 } 2468 2469 PetscErrorCode PCBDDCBenignDetectSaddlePoint(PC pc, IS *zerodiaglocal) 2470 { 2471 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2472 IS pressures,zerodiag,zerodiag_save,*zerodiag_subs; 2473 PetscInt nz,n; 2474 PetscInt *interior_dofs,n_interior_dofs,nneu; 2475 PetscBool sorted,have_null,has_null_pressures,recompute_zerodiag,checkb; 2476 PetscErrorCode ierr; 2477 2478 PetscFunctionBegin; 2479 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 2480 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 2481 for (n=0;n<pcbddc->benign_n;n++) { 2482 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[n]);CHKERRQ(ierr); 2483 } 2484 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 2485 pcbddc->benign_n = 0; 2486 2487 /* if a local info on dofs is present, uses the last field for "pressures" (or fid by command line) 2488 otherwise, it uses only zerodiagonal dofs (ok if the pressure block is all zero; it could fail if it is not) 2489 Checks if all the pressure dofs in each subdomain have a zero diagonal 2490 If not, a change of basis on pressures is not needed 2491 since the local Schur complements are already SPD 2492 */ 2493 has_null_pressures = PETSC_TRUE; 2494 have_null = PETSC_TRUE; 2495 if (pcbddc->n_ISForDofsLocal) { 2496 IS iP = NULL; 2497 PetscInt npl,*idxs,p = pcbddc->n_ISForDofsLocal-1; 2498 2499 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)pc),((PetscObject)pc)->prefix,"BDDC benign options","PC");CHKERRQ(ierr); 2500 ierr = PetscOptionsInt("-pc_bddc_pressure_field","Field id for pressures",NULL,p,&p,NULL);CHKERRQ(ierr); 2501 ierr = PetscOptionsEnd();CHKERRQ(ierr); 2502 if (p < 0 || p > pcbddc->n_ISForDofsLocal-1) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Invalid field id for pressures %D",p); 2503 /* Dofs splitting for BDDC cannot have PETSC_COMM_SELF, so create a sequential IS */ 2504 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[p],&npl);CHKERRQ(ierr); 2505 ierr = ISGetIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2506 ierr = ISCreateGeneral(PETSC_COMM_SELF,npl,idxs,PETSC_COPY_VALUES,&pressures);CHKERRQ(ierr); 2507 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[p],(const PetscInt**)&idxs);CHKERRQ(ierr); 2508 /* remove zeroed out pressures if we are setting up a BDDC solver for a saddle-point FETI-DP */ 2509 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lP",(PetscObject*)&iP);CHKERRQ(ierr); 2510 if (iP) { 2511 IS newpressures; 2512 2513 ierr = ISDifference(pressures,iP,&newpressures);CHKERRQ(ierr); 2514 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2515 pressures = newpressures; 2516 } 2517 ierr = ISSorted(pressures,&sorted);CHKERRQ(ierr); 2518 if (!sorted) { 2519 ierr = ISSort(pressures);CHKERRQ(ierr); 2520 } 2521 } else { 2522 pressures = NULL; 2523 } 2524 /* pcis has not been setup yet, so get the local size from the subdomain matrix */ 2525 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2526 if (!n) pcbddc->benign_change_explicit = PETSC_TRUE; 2527 ierr = MatFindZeroDiagonals(pcbddc->local_mat,&zerodiag);CHKERRQ(ierr); 2528 ierr = ISSorted(zerodiag,&sorted);CHKERRQ(ierr); 2529 if (!sorted) { 2530 ierr = ISSort(zerodiag);CHKERRQ(ierr); 2531 } 2532 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2533 zerodiag_save = zerodiag; 2534 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2535 if (!nz) { 2536 if (n) have_null = PETSC_FALSE; 2537 has_null_pressures = PETSC_FALSE; 2538 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2539 } 2540 recompute_zerodiag = PETSC_FALSE; 2541 /* in case disconnected subdomains info is present, split the pressures accordingly (otherwise the benign trick could fail) */ 2542 zerodiag_subs = NULL; 2543 pcbddc->benign_n = 0; 2544 n_interior_dofs = 0; 2545 interior_dofs = NULL; 2546 nneu = 0; 2547 if (pcbddc->NeumannBoundariesLocal) { 2548 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&nneu);CHKERRQ(ierr); 2549 } 2550 checkb = (PetscBool)(!pcbddc->NeumannBoundariesLocal || pcbddc->current_level); 2551 if (checkb) { /* need to compute interior nodes */ 2552 PetscInt n,i,j; 2553 PetscInt n_neigh,*neigh,*n_shared,**shared; 2554 PetscInt *iwork; 2555 2556 ierr = ISLocalToGlobalMappingGetSize(pc->pmat->rmap->mapping,&n);CHKERRQ(ierr); 2557 ierr = ISLocalToGlobalMappingGetInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2558 ierr = PetscCalloc1(n,&iwork);CHKERRQ(ierr); 2559 ierr = PetscMalloc1(n,&interior_dofs);CHKERRQ(ierr); 2560 for (i=1;i<n_neigh;i++) 2561 for (j=0;j<n_shared[i];j++) 2562 iwork[shared[i][j]] += 1; 2563 for (i=0;i<n;i++) 2564 if (!iwork[i]) 2565 interior_dofs[n_interior_dofs++] = i; 2566 ierr = PetscFree(iwork);CHKERRQ(ierr); 2567 ierr = ISLocalToGlobalMappingRestoreInfo(pc->pmat->rmap->mapping,&n_neigh,&neigh,&n_shared,&shared);CHKERRQ(ierr); 2568 } 2569 if (has_null_pressures) { 2570 IS *subs; 2571 PetscInt nsubs,i,j,nl; 2572 const PetscInt *idxs; 2573 PetscScalar *array; 2574 Vec *work; 2575 Mat_IS* matis = (Mat_IS*)(pc->pmat->data); 2576 2577 subs = pcbddc->local_subs; 2578 nsubs = pcbddc->n_local_subs; 2579 /* these vectors are needed to check if the constant on pressures is in the kernel of the local operator B (i.e. B(v_I,p0) should be zero) */ 2580 if (checkb) { 2581 ierr = VecDuplicateVecs(matis->y,2,&work);CHKERRQ(ierr); 2582 ierr = ISGetLocalSize(zerodiag,&nl);CHKERRQ(ierr); 2583 ierr = ISGetIndices(zerodiag,&idxs);CHKERRQ(ierr); 2584 /* work[0] = 1_p */ 2585 ierr = VecSet(work[0],0.);CHKERRQ(ierr); 2586 ierr = VecGetArray(work[0],&array);CHKERRQ(ierr); 2587 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2588 ierr = VecRestoreArray(work[0],&array);CHKERRQ(ierr); 2589 /* work[0] = 1_v */ 2590 ierr = VecSet(work[1],1.);CHKERRQ(ierr); 2591 ierr = VecGetArray(work[1],&array);CHKERRQ(ierr); 2592 for (j=0;j<nl;j++) array[idxs[j]] = 0.; 2593 ierr = VecRestoreArray(work[1],&array);CHKERRQ(ierr); 2594 ierr = ISRestoreIndices(zerodiag,&idxs);CHKERRQ(ierr); 2595 } 2596 if (nsubs > 1) { 2597 ierr = PetscCalloc1(nsubs,&zerodiag_subs);CHKERRQ(ierr); 2598 for (i=0;i<nsubs;i++) { 2599 ISLocalToGlobalMapping l2g; 2600 IS t_zerodiag_subs; 2601 PetscInt nl; 2602 2603 ierr = ISLocalToGlobalMappingCreateIS(subs[i],&l2g);CHKERRQ(ierr); 2604 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,zerodiag,&t_zerodiag_subs);CHKERRQ(ierr); 2605 ierr = ISGetLocalSize(t_zerodiag_subs,&nl);CHKERRQ(ierr); 2606 if (nl) { 2607 PetscBool valid = PETSC_TRUE; 2608 2609 if (checkb) { 2610 ierr = VecSet(matis->x,0);CHKERRQ(ierr); 2611 ierr = ISGetLocalSize(subs[i],&nl);CHKERRQ(ierr); 2612 ierr = ISGetIndices(subs[i],&idxs);CHKERRQ(ierr); 2613 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2614 for (j=0;j<nl;j++) array[idxs[j]] = 1.; 2615 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2616 ierr = ISRestoreIndices(subs[i],&idxs);CHKERRQ(ierr); 2617 ierr = VecPointwiseMult(matis->x,work[0],matis->x);CHKERRQ(ierr); 2618 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2619 ierr = VecPointwiseMult(matis->y,work[1],matis->y);CHKERRQ(ierr); 2620 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2621 for (j=0;j<n_interior_dofs;j++) { 2622 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2623 valid = PETSC_FALSE; 2624 break; 2625 } 2626 } 2627 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2628 } 2629 if (valid && nneu) { 2630 const PetscInt *idxs; 2631 PetscInt nzb; 2632 2633 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2634 ierr = ISGlobalToLocalMappingApply(l2g,IS_GTOLM_DROP,nneu,idxs,&nzb,NULL);CHKERRQ(ierr); 2635 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 2636 if (nzb) valid = PETSC_FALSE; 2637 } 2638 if (valid && pressures) { 2639 IS t_pressure_subs; 2640 ierr = ISGlobalToLocalMappingApplyIS(l2g,IS_GTOLM_DROP,pressures,&t_pressure_subs);CHKERRQ(ierr); 2641 ierr = ISEqual(t_pressure_subs,t_zerodiag_subs,&valid);CHKERRQ(ierr); 2642 ierr = ISDestroy(&t_pressure_subs);CHKERRQ(ierr); 2643 } 2644 if (valid) { 2645 ierr = ISLocalToGlobalMappingApplyIS(l2g,t_zerodiag_subs,&zerodiag_subs[pcbddc->benign_n]);CHKERRQ(ierr); 2646 pcbddc->benign_n++; 2647 } else { 2648 recompute_zerodiag = PETSC_TRUE; 2649 } 2650 } 2651 ierr = ISDestroy(&t_zerodiag_subs);CHKERRQ(ierr); 2652 ierr = ISLocalToGlobalMappingDestroy(&l2g);CHKERRQ(ierr); 2653 } 2654 } else { /* there's just one subdomain (or zero if they have not been detected */ 2655 PetscBool valid = PETSC_TRUE; 2656 2657 if (nneu) valid = PETSC_FALSE; 2658 if (valid && pressures) { 2659 ierr = ISEqual(pressures,zerodiag,&valid);CHKERRQ(ierr); 2660 } 2661 if (valid && checkb) { 2662 ierr = MatMult(matis->A,work[0],matis->x);CHKERRQ(ierr); 2663 ierr = VecPointwiseMult(matis->x,work[1],matis->x);CHKERRQ(ierr); 2664 ierr = VecGetArray(matis->x,&array);CHKERRQ(ierr); 2665 for (j=0;j<n_interior_dofs;j++) { 2666 if (PetscAbsScalar(array[interior_dofs[j]]) > PETSC_SMALL) { 2667 valid = PETSC_FALSE; 2668 break; 2669 } 2670 } 2671 ierr = VecRestoreArray(matis->x,&array);CHKERRQ(ierr); 2672 } 2673 if (valid) { 2674 pcbddc->benign_n = 1; 2675 ierr = PetscMalloc1(pcbddc->benign_n,&zerodiag_subs);CHKERRQ(ierr); 2676 ierr = PetscObjectReference((PetscObject)zerodiag);CHKERRQ(ierr); 2677 zerodiag_subs[0] = zerodiag; 2678 } 2679 } 2680 if (checkb) { 2681 ierr = VecDestroyVecs(2,&work);CHKERRQ(ierr); 2682 } 2683 } 2684 ierr = PetscFree(interior_dofs);CHKERRQ(ierr); 2685 2686 if (!pcbddc->benign_n) { 2687 PetscInt n; 2688 2689 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2690 recompute_zerodiag = PETSC_FALSE; 2691 ierr = MatGetLocalSize(pcbddc->local_mat,&n,NULL);CHKERRQ(ierr); 2692 if (n) { 2693 has_null_pressures = PETSC_FALSE; 2694 have_null = PETSC_FALSE; 2695 } 2696 } 2697 2698 /* final check for null pressures */ 2699 if (zerodiag && pressures) { 2700 PetscInt nz,np; 2701 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2702 ierr = ISGetLocalSize(pressures,&np);CHKERRQ(ierr); 2703 if (nz != np) have_null = PETSC_FALSE; 2704 } 2705 2706 if (recompute_zerodiag) { 2707 ierr = ISDestroy(&zerodiag);CHKERRQ(ierr); 2708 if (pcbddc->benign_n == 1) { 2709 ierr = PetscObjectReference((PetscObject)zerodiag_subs[0]);CHKERRQ(ierr); 2710 zerodiag = zerodiag_subs[0]; 2711 } else { 2712 PetscInt i,nzn,*new_idxs; 2713 2714 nzn = 0; 2715 for (i=0;i<pcbddc->benign_n;i++) { 2716 PetscInt ns; 2717 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2718 nzn += ns; 2719 } 2720 ierr = PetscMalloc1(nzn,&new_idxs);CHKERRQ(ierr); 2721 nzn = 0; 2722 for (i=0;i<pcbddc->benign_n;i++) { 2723 PetscInt ns,*idxs; 2724 ierr = ISGetLocalSize(zerodiag_subs[i],&ns);CHKERRQ(ierr); 2725 ierr = ISGetIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2726 ierr = PetscMemcpy(new_idxs+nzn,idxs,ns*sizeof(PetscInt));CHKERRQ(ierr); 2727 ierr = ISRestoreIndices(zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2728 nzn += ns; 2729 } 2730 ierr = PetscSortInt(nzn,new_idxs);CHKERRQ(ierr); 2731 ierr = ISCreateGeneral(PETSC_COMM_SELF,nzn,new_idxs,PETSC_OWN_POINTER,&zerodiag);CHKERRQ(ierr); 2732 } 2733 have_null = PETSC_FALSE; 2734 } 2735 2736 /* Prepare matrix to compute no-net-flux */ 2737 if (pcbddc->compute_nonetflux && !pcbddc->divudotp) { 2738 Mat A,loc_divudotp; 2739 ISLocalToGlobalMapping rl2g,cl2g,l2gmap; 2740 IS row,col,isused = NULL; 2741 PetscInt M,N,n,st,n_isused; 2742 2743 if (pressures) { 2744 isused = pressures; 2745 } else { 2746 isused = zerodiag_save; 2747 } 2748 ierr = MatGetLocalToGlobalMapping(pc->pmat,&l2gmap,NULL);CHKERRQ(ierr); 2749 ierr = MatISGetLocalMat(pc->pmat,&A);CHKERRQ(ierr); 2750 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 2751 if (!isused && n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_USER,"Don't know how to extract div u dot p! Please provide the pressure field"); 2752 n_isused = 0; 2753 if (isused) { 2754 ierr = ISGetLocalSize(isused,&n_isused);CHKERRQ(ierr); 2755 } 2756 ierr = MPI_Scan(&n_isused,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2757 st = st-n_isused; 2758 if (n) { 2759 const PetscInt *gidxs; 2760 2761 ierr = MatCreateSubMatrix(A,isused,NULL,MAT_INITIAL_MATRIX,&loc_divudotp);CHKERRQ(ierr); 2762 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2763 /* TODO: extend ISCreateStride with st = PETSC_DECIDE */ 2764 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2765 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2766 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 2767 } else { 2768 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&loc_divudotp);CHKERRQ(ierr); 2769 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),n_isused,st,1,&row);CHKERRQ(ierr); 2770 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),0,NULL,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 2771 } 2772 ierr = MatGetSize(pc->pmat,NULL,&N);CHKERRQ(ierr); 2773 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 2774 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 2775 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 2776 ierr = ISDestroy(&row);CHKERRQ(ierr); 2777 ierr = ISDestroy(&col);CHKERRQ(ierr); 2778 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->divudotp);CHKERRQ(ierr); 2779 ierr = MatSetType(pcbddc->divudotp,MATIS);CHKERRQ(ierr); 2780 ierr = MatSetSizes(pcbddc->divudotp,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 2781 ierr = MatSetLocalToGlobalMapping(pcbddc->divudotp,rl2g,cl2g);CHKERRQ(ierr); 2782 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 2783 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 2784 ierr = MatISSetLocalMat(pcbddc->divudotp,loc_divudotp);CHKERRQ(ierr); 2785 ierr = MatDestroy(&loc_divudotp);CHKERRQ(ierr); 2786 ierr = MatAssemblyBegin(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2787 ierr = MatAssemblyEnd(pcbddc->divudotp,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2788 } 2789 ierr = ISDestroy(&zerodiag_save);CHKERRQ(ierr); 2790 2791 /* change of basis and p0 dofs */ 2792 if (has_null_pressures) { 2793 IS zerodiagc; 2794 const PetscInt *idxs,*idxsc; 2795 PetscInt i,s,*nnz; 2796 2797 ierr = ISGetLocalSize(zerodiag,&nz);CHKERRQ(ierr); 2798 ierr = ISComplement(zerodiag,0,n,&zerodiagc);CHKERRQ(ierr); 2799 ierr = ISGetIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2800 /* local change of basis for pressures */ 2801 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 2802 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_change);CHKERRQ(ierr); 2803 ierr = MatSetType(pcbddc->benign_change,MATAIJ);CHKERRQ(ierr); 2804 ierr = MatSetSizes(pcbddc->benign_change,n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2805 ierr = PetscMalloc1(n,&nnz);CHKERRQ(ierr); 2806 for (i=0;i<n-nz;i++) nnz[idxsc[i]] = 1; /* identity on velocities plus pressure dofs for non-singular subdomains */ 2807 for (i=0;i<pcbddc->benign_n;i++) { 2808 PetscInt nzs,j; 2809 2810 ierr = ISGetLocalSize(zerodiag_subs[i],&nzs);CHKERRQ(ierr); 2811 ierr = ISGetIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2812 for (j=0;j<nzs-1;j++) nnz[idxs[j]] = 2; /* change on pressures */ 2813 nnz[idxs[nzs-1]] = nzs; /* last local pressure dof in subdomain */ 2814 ierr = ISRestoreIndices(zerodiag_subs[i],&idxs);CHKERRQ(ierr); 2815 } 2816 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_change,0,nnz);CHKERRQ(ierr); 2817 ierr = PetscFree(nnz);CHKERRQ(ierr); 2818 /* set identity on velocities */ 2819 for (i=0;i<n-nz;i++) { 2820 ierr = MatSetValue(pcbddc->benign_change,idxsc[i],idxsc[i],1.,INSERT_VALUES);CHKERRQ(ierr); 2821 } 2822 ierr = ISRestoreIndices(zerodiagc,&idxsc);CHKERRQ(ierr); 2823 ierr = ISDestroy(&zerodiagc);CHKERRQ(ierr); 2824 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 2825 ierr = PetscMalloc3(pcbddc->benign_n,&pcbddc->benign_p0_lidx,pcbddc->benign_n,&pcbddc->benign_p0_gidx,pcbddc->benign_n,&pcbddc->benign_p0);CHKERRQ(ierr); 2826 /* set change on pressures */ 2827 for (s=0;s<pcbddc->benign_n;s++) { 2828 PetscScalar *array; 2829 PetscInt nzs; 2830 2831 ierr = ISGetLocalSize(zerodiag_subs[s],&nzs);CHKERRQ(ierr); 2832 ierr = ISGetIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2833 for (i=0;i<nzs-1;i++) { 2834 PetscScalar vals[2]; 2835 PetscInt cols[2]; 2836 2837 cols[0] = idxs[i]; 2838 cols[1] = idxs[nzs-1]; 2839 vals[0] = 1.; 2840 vals[1] = 1.; 2841 ierr = MatSetValues(pcbddc->benign_change,1,cols,2,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 2842 } 2843 ierr = PetscMalloc1(nzs,&array);CHKERRQ(ierr); 2844 for (i=0;i<nzs-1;i++) array[i] = -1.; 2845 array[nzs-1] = 1.; 2846 ierr = MatSetValues(pcbddc->benign_change,1,idxs+nzs-1,nzs,idxs,array,INSERT_VALUES);CHKERRQ(ierr); 2847 /* store local idxs for p0 */ 2848 pcbddc->benign_p0_lidx[s] = idxs[nzs-1]; 2849 ierr = ISRestoreIndices(zerodiag_subs[s],&idxs);CHKERRQ(ierr); 2850 ierr = PetscFree(array);CHKERRQ(ierr); 2851 } 2852 ierr = MatAssemblyBegin(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2853 ierr = MatAssemblyEnd(pcbddc->benign_change,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2854 /* project if needed */ 2855 if (pcbddc->benign_change_explicit) { 2856 Mat M; 2857 2858 ierr = MatPtAP(pcbddc->local_mat,pcbddc->benign_change,MAT_INITIAL_MATRIX,2.0,&M);CHKERRQ(ierr); 2859 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 2860 ierr = MatSeqAIJCompress(M,&pcbddc->local_mat);CHKERRQ(ierr); 2861 ierr = MatDestroy(&M);CHKERRQ(ierr); 2862 } 2863 /* store global idxs for p0 */ 2864 ierr = ISLocalToGlobalMappingApply(pc->pmat->rmap->mapping,pcbddc->benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2865 } 2866 pcbddc->benign_zerodiag_subs = zerodiag_subs; 2867 ierr = ISDestroy(&pressures);CHKERRQ(ierr); 2868 2869 /* determines if the coarse solver will be singular or not */ 2870 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_null,1,MPIU_BOOL,MPI_LAND,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2871 /* determines if the problem has subdomains with 0 pressure block */ 2872 ierr = MPI_Allreduce(&have_null,&pcbddc->benign_have_null,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 2873 *zerodiaglocal = zerodiag; 2874 PetscFunctionReturn(0); 2875 } 2876 2877 PetscErrorCode PCBDDCBenignGetOrSetP0(PC pc, Vec v, PetscBool get) 2878 { 2879 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2880 PetscScalar *array; 2881 PetscErrorCode ierr; 2882 2883 PetscFunctionBegin; 2884 if (!pcbddc->benign_sf) { 2885 ierr = PetscSFCreate(PetscObjectComm((PetscObject)pc),&pcbddc->benign_sf);CHKERRQ(ierr); 2886 ierr = PetscSFSetGraphLayout(pcbddc->benign_sf,pc->pmat->rmap,pcbddc->benign_n,NULL,PETSC_OWN_POINTER,pcbddc->benign_p0_gidx);CHKERRQ(ierr); 2887 } 2888 if (get) { 2889 ierr = VecGetArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2890 ierr = PetscSFBcastBegin(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2891 ierr = PetscSFBcastEnd(pcbddc->benign_sf,MPIU_SCALAR,array,pcbddc->benign_p0);CHKERRQ(ierr); 2892 ierr = VecRestoreArrayRead(v,(const PetscScalar**)&array);CHKERRQ(ierr); 2893 } else { 2894 ierr = VecGetArray(v,&array);CHKERRQ(ierr); 2895 ierr = PetscSFReduceBegin(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2896 ierr = PetscSFReduceEnd(pcbddc->benign_sf,MPIU_SCALAR,pcbddc->benign_p0,array,MPIU_REPLACE);CHKERRQ(ierr); 2897 ierr = VecRestoreArray(v,&array);CHKERRQ(ierr); 2898 } 2899 PetscFunctionReturn(0); 2900 } 2901 2902 PetscErrorCode PCBDDCBenignPopOrPushB0(PC pc, PetscBool pop) 2903 { 2904 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 2905 PetscErrorCode ierr; 2906 2907 PetscFunctionBegin; 2908 /* TODO: add error checking 2909 - avoid nested pop (or push) calls. 2910 - cannot push before pop. 2911 - cannot call this if pcbddc->local_mat is NULL 2912 */ 2913 if (!pcbddc->benign_n) { 2914 PetscFunctionReturn(0); 2915 } 2916 if (pop) { 2917 if (pcbddc->benign_change_explicit) { 2918 IS is_p0; 2919 MatReuse reuse; 2920 2921 /* extract B_0 */ 2922 reuse = MAT_INITIAL_MATRIX; 2923 if (pcbddc->benign_B0) { 2924 reuse = MAT_REUSE_MATRIX; 2925 } 2926 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->benign_n,pcbddc->benign_p0_lidx,PETSC_COPY_VALUES,&is_p0);CHKERRQ(ierr); 2927 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_p0,NULL,reuse,&pcbddc->benign_B0);CHKERRQ(ierr); 2928 /* remove rows and cols from local problem */ 2929 ierr = MatSetOption(pcbddc->local_mat,MAT_KEEP_NONZERO_PATTERN,PETSC_TRUE);CHKERRQ(ierr); 2930 ierr = MatSetOption(pcbddc->local_mat,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 2931 ierr = MatZeroRowsColumnsIS(pcbddc->local_mat,is_p0,1.0,NULL,NULL);CHKERRQ(ierr); 2932 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 2933 } else { 2934 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 2935 PetscScalar *vals; 2936 PetscInt i,n,*idxs_ins; 2937 2938 ierr = VecGetLocalSize(matis->y,&n);CHKERRQ(ierr); 2939 ierr = PetscMalloc2(n,&idxs_ins,n,&vals);CHKERRQ(ierr); 2940 if (!pcbddc->benign_B0) { 2941 PetscInt *nnz; 2942 ierr = MatCreate(PetscObjectComm((PetscObject)pcbddc->local_mat),&pcbddc->benign_B0);CHKERRQ(ierr); 2943 ierr = MatSetType(pcbddc->benign_B0,MATAIJ);CHKERRQ(ierr); 2944 ierr = MatSetSizes(pcbddc->benign_B0,pcbddc->benign_n,n,PETSC_DECIDE,PETSC_DECIDE);CHKERRQ(ierr); 2945 ierr = PetscMalloc1(pcbddc->benign_n,&nnz);CHKERRQ(ierr); 2946 for (i=0;i<pcbddc->benign_n;i++) { 2947 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nnz[i]);CHKERRQ(ierr); 2948 nnz[i] = n - nnz[i]; 2949 } 2950 ierr = MatSeqAIJSetPreallocation(pcbddc->benign_B0,0,nnz);CHKERRQ(ierr); 2951 ierr = PetscFree(nnz);CHKERRQ(ierr); 2952 } 2953 2954 for (i=0;i<pcbddc->benign_n;i++) { 2955 PetscScalar *array; 2956 PetscInt *idxs,j,nz,cum; 2957 2958 ierr = VecSet(matis->x,0.);CHKERRQ(ierr); 2959 ierr = ISGetLocalSize(pcbddc->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 2960 ierr = ISGetIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2961 for (j=0;j<nz;j++) vals[j] = 1.; 2962 ierr = VecSetValues(matis->x,nz,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 2963 ierr = VecAssemblyBegin(matis->x);CHKERRQ(ierr); 2964 ierr = VecAssemblyEnd(matis->x);CHKERRQ(ierr); 2965 ierr = VecSet(matis->y,0.);CHKERRQ(ierr); 2966 ierr = MatMult(matis->A,matis->x,matis->y);CHKERRQ(ierr); 2967 ierr = VecGetArray(matis->y,&array);CHKERRQ(ierr); 2968 cum = 0; 2969 for (j=0;j<n;j++) { 2970 if (PetscUnlikely(PetscAbsScalar(array[j]) > PETSC_SMALL)) { 2971 vals[cum] = array[j]; 2972 idxs_ins[cum] = j; 2973 cum++; 2974 } 2975 } 2976 ierr = MatSetValues(pcbddc->benign_B0,1,&i,cum,idxs_ins,vals,INSERT_VALUES);CHKERRQ(ierr); 2977 ierr = VecRestoreArray(matis->y,&array);CHKERRQ(ierr); 2978 ierr = ISRestoreIndices(pcbddc->benign_zerodiag_subs[i],(const PetscInt**)&idxs);CHKERRQ(ierr); 2979 } 2980 ierr = MatAssemblyBegin(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2981 ierr = MatAssemblyEnd(pcbddc->benign_B0,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2982 ierr = PetscFree2(idxs_ins,vals);CHKERRQ(ierr); 2983 } 2984 } else { /* push */ 2985 if (pcbddc->benign_change_explicit) { 2986 PetscInt i; 2987 2988 for (i=0;i<pcbddc->benign_n;i++) { 2989 PetscScalar *B0_vals; 2990 PetscInt *B0_cols,B0_ncol; 2991 2992 ierr = MatGetRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2993 ierr = MatSetValues(pcbddc->local_mat,1,pcbddc->benign_p0_lidx+i,B0_ncol,B0_cols,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2994 ierr = MatSetValues(pcbddc->local_mat,B0_ncol,B0_cols,1,pcbddc->benign_p0_lidx+i,B0_vals,INSERT_VALUES);CHKERRQ(ierr); 2995 ierr = MatSetValue(pcbddc->local_mat,pcbddc->benign_p0_lidx[i],pcbddc->benign_p0_lidx[i],0.0,INSERT_VALUES);CHKERRQ(ierr); 2996 ierr = MatRestoreRow(pcbddc->benign_B0,i,&B0_ncol,(const PetscInt**)&B0_cols,(const PetscScalar**)&B0_vals);CHKERRQ(ierr); 2997 } 2998 ierr = MatAssemblyBegin(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 2999 ierr = MatAssemblyEnd(pcbddc->local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 3000 } else { 3001 SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Cannot push B0!\n"); 3002 } 3003 } 3004 PetscFunctionReturn(0); 3005 } 3006 3007 PetscErrorCode PCBDDCAdaptiveSelection(PC pc) 3008 { 3009 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3010 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3011 PetscBLASInt B_dummyint,B_neigs,B_ierr,B_lwork; 3012 PetscBLASInt *B_iwork,*B_ifail; 3013 PetscScalar *work,lwork; 3014 PetscScalar *St,*S,*eigv; 3015 PetscScalar *Sarray,*Starray; 3016 PetscReal *eigs,thresh; 3017 PetscInt i,nmax,nmin,nv,cum,mss,cum2,cumarray,maxneigs; 3018 PetscBool allocated_S_St; 3019 #if defined(PETSC_USE_COMPLEX) 3020 PetscReal *rwork; 3021 #endif 3022 PetscErrorCode ierr; 3023 3024 PetscFunctionBegin; 3025 if (!sub_schurs) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Adaptive selection of constraints requires SubSchurs data"); 3026 if (!sub_schurs->schur_explicit) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Adaptive selection of constraints requires MUMPS and/or MKL_CPARDISO"); 3027 if (sub_schurs->n_subs && (!sub_schurs->is_hermitian || !sub_schurs->is_posdef)) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_SUP,"Adaptive selection not yet implemented for general matrix pencils (herm %d, posdef %d)\nRerun with -sub_schurs_hermitian 1 -sub_schurs_posdef 1 if the problem is SPD",sub_schurs->is_hermitian,sub_schurs->is_posdef); 3028 3029 if (pcbddc->dbg_flag) { 3030 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3031 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 3032 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check adaptive selection of constraints\n");CHKERRQ(ierr); 3033 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 3034 } 3035 3036 if (pcbddc->dbg_flag) { 3037 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d cc %d (%d,%d).\n",PetscGlobalRank,sub_schurs->n_subs,sub_schurs->is_hermitian,sub_schurs->is_posdef); 3038 } 3039 3040 /* max size of subsets */ 3041 mss = 0; 3042 for (i=0;i<sub_schurs->n_subs;i++) { 3043 PetscInt subset_size; 3044 3045 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3046 mss = PetscMax(mss,subset_size); 3047 } 3048 3049 /* min/max and threshold */ 3050 nmax = pcbddc->adaptive_nmax > 0 ? pcbddc->adaptive_nmax : mss; 3051 nmin = pcbddc->adaptive_nmin > 0 ? pcbddc->adaptive_nmin : 0; 3052 nmax = PetscMax(nmin,nmax); 3053 allocated_S_St = PETSC_FALSE; 3054 if (nmin) { 3055 allocated_S_St = PETSC_TRUE; 3056 } 3057 3058 /* allocate lapack workspace */ 3059 cum = cum2 = 0; 3060 maxneigs = 0; 3061 for (i=0;i<sub_schurs->n_subs;i++) { 3062 PetscInt n,subset_size; 3063 3064 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3065 n = PetscMin(subset_size,nmax); 3066 cum += subset_size; 3067 cum2 += subset_size*n; 3068 maxneigs = PetscMax(maxneigs,n); 3069 } 3070 if (mss) { 3071 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 3072 PetscBLASInt B_itype = 1; 3073 PetscBLASInt B_N = mss; 3074 PetscReal zero = 0.0; 3075 PetscReal eps = 0.0; /* dlamch? */ 3076 3077 B_lwork = -1; 3078 S = NULL; 3079 St = NULL; 3080 eigs = NULL; 3081 eigv = NULL; 3082 B_iwork = NULL; 3083 B_ifail = NULL; 3084 #if defined(PETSC_USE_COMPLEX) 3085 rwork = NULL; 3086 #endif 3087 thresh = 1.0; 3088 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3089 #if defined(PETSC_USE_COMPLEX) 3090 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3091 #else 3092 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&zero,&thresh,&B_dummyint,&B_dummyint,&eps,&B_neigs,eigs,eigv,&B_N,&lwork,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3093 #endif 3094 if (B_ierr != 0) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYGVX Lapack routine %d",(int)B_ierr); 3095 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3096 } else { 3097 /* TODO */ 3098 } 3099 } else { 3100 lwork = 0; 3101 } 3102 3103 nv = 0; 3104 if (sub_schurs->is_vertices && pcbddc->use_vertices) { /* complement set of active subsets, each entry is a vertex (boundary made by active subsets, vertices and dirichlet dofs) */ 3105 ierr = ISGetLocalSize(sub_schurs->is_vertices,&nv);CHKERRQ(ierr); 3106 } 3107 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lwork),&B_lwork);CHKERRQ(ierr); 3108 if (allocated_S_St) { 3109 ierr = PetscMalloc2(mss*mss,&S,mss*mss,&St);CHKERRQ(ierr); 3110 } 3111 ierr = PetscMalloc5(mss*mss,&eigv,mss,&eigs,B_lwork,&work,5*mss,&B_iwork,mss,&B_ifail);CHKERRQ(ierr); 3112 #if defined(PETSC_USE_COMPLEX) 3113 ierr = PetscMalloc1(7*mss,&rwork);CHKERRQ(ierr); 3114 #endif 3115 ierr = PetscMalloc5(nv+sub_schurs->n_subs,&pcbddc->adaptive_constraints_n, 3116 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_idxs_ptr, 3117 nv+sub_schurs->n_subs+1,&pcbddc->adaptive_constraints_data_ptr, 3118 nv+cum,&pcbddc->adaptive_constraints_idxs, 3119 nv+cum2,&pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 3120 ierr = PetscMemzero(pcbddc->adaptive_constraints_n,(nv+sub_schurs->n_subs)*sizeof(PetscInt));CHKERRQ(ierr); 3121 3122 maxneigs = 0; 3123 cum = cumarray = 0; 3124 pcbddc->adaptive_constraints_idxs_ptr[0] = 0; 3125 pcbddc->adaptive_constraints_data_ptr[0] = 0; 3126 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 3127 const PetscInt *idxs; 3128 3129 ierr = ISGetIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3130 for (cum=0;cum<nv;cum++) { 3131 pcbddc->adaptive_constraints_n[cum] = 1; 3132 pcbddc->adaptive_constraints_idxs[cum] = idxs[cum]; 3133 pcbddc->adaptive_constraints_data[cum] = 1.0; 3134 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum]+1; 3135 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum]+1; 3136 } 3137 ierr = ISRestoreIndices(sub_schurs->is_vertices,&idxs);CHKERRQ(ierr); 3138 } 3139 3140 if (mss) { /* multilevel */ 3141 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3142 ierr = MatSeqAIJGetArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3143 } 3144 3145 thresh = pcbddc->adaptive_threshold; 3146 for (i=0;i<sub_schurs->n_subs;i++) { 3147 const PetscInt *idxs; 3148 PetscReal upper,lower; 3149 PetscInt j,subset_size,eigs_start = 0; 3150 PetscBLASInt B_N; 3151 PetscBool same_data = PETSC_FALSE; 3152 3153 if (pcbddc->use_deluxe_scaling) { 3154 upper = PETSC_MAX_REAL; 3155 lower = thresh; 3156 } else { 3157 upper = 1./thresh; 3158 lower = 0.; 3159 } 3160 ierr = ISGetLocalSize(sub_schurs->is_subs[i],&subset_size);CHKERRQ(ierr); 3161 ierr = ISGetIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3162 ierr = PetscBLASIntCast(subset_size,&B_N);CHKERRQ(ierr); 3163 if (allocated_S_St) { /* S and S_t should be copied since we could need them later */ 3164 if (sub_schurs->is_hermitian) { 3165 PetscInt j,k; 3166 if (sub_schurs->n_subs == 1) { /* zeroing memory to use PetscMemcmp later */ 3167 ierr = PetscMemzero(S,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3168 ierr = PetscMemzero(St,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3169 } 3170 for (j=0;j<subset_size;j++) { 3171 for (k=j;k<subset_size;k++) { 3172 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3173 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3174 } 3175 } 3176 } else { 3177 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3178 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3179 } 3180 } else { 3181 S = Sarray + cumarray; 3182 St = Starray + cumarray; 3183 } 3184 /* see if we can save some work */ 3185 if (sub_schurs->n_subs == 1 && pcbddc->use_deluxe_scaling) { 3186 ierr = PetscMemcmp(S,St,subset_size*subset_size*sizeof(PetscScalar),&same_data);CHKERRQ(ierr); 3187 } 3188 3189 if (same_data && !sub_schurs->change) { /* there's no need of constraints here */ 3190 B_neigs = 0; 3191 } else { 3192 if (sub_schurs->is_hermitian && sub_schurs->is_posdef) { 3193 PetscBLASInt B_itype = 1; 3194 PetscBLASInt B_IL, B_IU; 3195 PetscReal eps = -1.0; /* dlamch? */ 3196 PetscInt nmin_s; 3197 PetscBool compute_range = PETSC_FALSE; 3198 3199 if (pcbddc->dbg_flag) { 3200 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Computing for sub %d/%d size %d count %d fid %d.\n",i,sub_schurs->n_subs,subset_size,pcbddc->mat_graph->count[idxs[0]]+1,pcbddc->mat_graph->which_dof[idxs[0]]); 3201 } 3202 3203 compute_range = PETSC_FALSE; 3204 if (thresh > 1.+PETSC_SMALL && !same_data) { 3205 compute_range = PETSC_TRUE; 3206 } 3207 3208 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3209 if (compute_range) { 3210 3211 /* ask for eigenvalues larger than thresh */ 3212 #if defined(PETSC_USE_COMPLEX) 3213 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3214 #else 3215 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","V","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3216 #endif 3217 } else if (!same_data) { 3218 B_IU = PetscMax(1,PetscMin(B_N,nmax)); 3219 B_IL = 1; 3220 #if defined(PETSC_USE_COMPLEX) 3221 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3222 #else 3223 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs,eigs,eigv,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3224 #endif 3225 } else { /* same_data is true, so just get the adaptive functional requested by the user */ 3226 PetscInt k; 3227 if (!sub_schurs->change_primal_sub) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 3228 ierr = ISGetLocalSize(sub_schurs->change_primal_sub[i],&nmax);CHKERRQ(ierr); 3229 ierr = PetscBLASIntCast(nmax,&B_neigs);CHKERRQ(ierr); 3230 nmin = nmax; 3231 ierr = PetscMemzero(eigv,subset_size*nmax*sizeof(PetscScalar));CHKERRQ(ierr); 3232 for (k=0;k<nmax;k++) { 3233 eigs[k] = 1./PETSC_SMALL; 3234 eigv[k*(subset_size+1)] = 1.0; 3235 } 3236 } 3237 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3238 if (B_ierr) { 3239 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3240 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3241 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3242 } 3243 3244 if (B_neigs > nmax) { 3245 if (pcbddc->dbg_flag) { 3246 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, more than maximum required %d.\n",B_neigs,nmax); 3247 } 3248 if (pcbddc->use_deluxe_scaling) eigs_start = B_neigs -nmax; 3249 B_neigs = nmax; 3250 } 3251 3252 nmin_s = PetscMin(nmin,B_N); 3253 if (B_neigs < nmin_s) { 3254 PetscBLASInt B_neigs2; 3255 3256 if (pcbddc->use_deluxe_scaling) { 3257 B_IL = B_N - nmin_s + 1; 3258 B_IU = B_N - B_neigs; 3259 } else { 3260 B_IL = B_neigs + 1; 3261 B_IU = nmin_s; 3262 } 3263 if (pcbddc->dbg_flag) { 3264 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," found %d eigs, less than minimum required %d. Asking for %d to %d incl (fortran like)\n",B_neigs,nmin,B_IL,B_IU); 3265 } 3266 if (sub_schurs->is_hermitian) { 3267 PetscInt j,k; 3268 for (j=0;j<subset_size;j++) { 3269 for (k=j;k<subset_size;k++) { 3270 S [j*subset_size+k] = Sarray [cumarray+j*subset_size+k]; 3271 St[j*subset_size+k] = Starray[cumarray+j*subset_size+k]; 3272 } 3273 } 3274 } else { 3275 ierr = PetscMemcpy(S,Sarray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3276 ierr = PetscMemcpy(St,Starray+cumarray,subset_size*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3277 } 3278 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 3279 #if defined(PETSC_USE_COMPLEX) 3280 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,rwork,B_iwork,B_ifail,&B_ierr)); 3281 #else 3282 PetscStackCallBLAS("LAPACKsygvx",LAPACKsygvx_(&B_itype,"V","I","L",&B_N,St,&B_N,S,&B_N,&lower,&upper,&B_IL,&B_IU,&eps,&B_neigs2,eigs+B_neigs,eigv+B_neigs*subset_size,&B_N,work,&B_lwork,B_iwork,B_ifail,&B_ierr)); 3283 #endif 3284 ierr = PetscFPTrapPop();CHKERRQ(ierr); 3285 B_neigs += B_neigs2; 3286 } 3287 if (B_ierr) { 3288 if (B_ierr < 0 ) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: illegal value for argument %d",-(int)B_ierr); 3289 else if (B_ierr <= B_N) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: %d eigenvalues failed to converge",(int)B_ierr); 3290 else SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYGVX Lapack routine: leading minor of order %d is not positive definite",(int)B_ierr-B_N-1); 3291 } 3292 if (pcbddc->dbg_flag) { 3293 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Got %d eigs\n",B_neigs);CHKERRQ(ierr); 3294 for (j=0;j<B_neigs;j++) { 3295 if (eigs[j] == 0.0) { 3296 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," Inf\n");CHKERRQ(ierr); 3297 } else { 3298 if (pcbddc->use_deluxe_scaling) { 3299 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",eigs[j+eigs_start]);CHKERRQ(ierr); 3300 } else { 3301 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.6e\n",1./eigs[j+eigs_start]);CHKERRQ(ierr); 3302 } 3303 } 3304 } 3305 } 3306 } else { 3307 /* TODO */ 3308 } 3309 } 3310 /* change the basis back to the original one */ 3311 if (sub_schurs->change) { 3312 Mat change,phi,phit; 3313 3314 if (pcbddc->dbg_flag > 2) { 3315 PetscInt ii; 3316 for (ii=0;ii<B_neigs;ii++) { 3317 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector (old basis) %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3318 for (j=0;j<B_N;j++) { 3319 #if defined(PETSC_USE_COMPLEX) 3320 PetscReal r = PetscRealPart(eigv[(ii+eigs_start)*subset_size+j]); 3321 PetscReal c = PetscImaginaryPart(eigv[(ii+eigs_start)*subset_size+j]); 3322 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3323 #else 3324 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",eigv[(ii+eigs_start)*subset_size+j]);CHKERRQ(ierr); 3325 #endif 3326 } 3327 } 3328 } 3329 ierr = KSPGetOperators(sub_schurs->change[i],&change,NULL);CHKERRQ(ierr); 3330 ierr = MatCreateSeqDense(PETSC_COMM_SELF,subset_size,B_neigs,eigv+eigs_start*subset_size,&phit);CHKERRQ(ierr); 3331 ierr = MatMatMult(change,phit,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&phi);CHKERRQ(ierr); 3332 ierr = MatCopy(phi,phit,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 3333 ierr = MatDestroy(&phit);CHKERRQ(ierr); 3334 ierr = MatDestroy(&phi);CHKERRQ(ierr); 3335 } 3336 maxneigs = PetscMax(B_neigs,maxneigs); 3337 pcbddc->adaptive_constraints_n[i+nv] = B_neigs; 3338 if (B_neigs) { 3339 ierr = PetscMemcpy(pcbddc->adaptive_constraints_data+pcbddc->adaptive_constraints_data_ptr[cum],eigv+eigs_start*subset_size,B_neigs*subset_size*sizeof(PetscScalar));CHKERRQ(ierr); 3340 3341 if (pcbddc->dbg_flag > 1) { 3342 PetscInt ii; 3343 for (ii=0;ii<B_neigs;ii++) { 3344 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," -> Eigenvector %d/%d (%d)\n",ii,B_neigs,B_N);CHKERRQ(ierr); 3345 for (j=0;j<B_N;j++) { 3346 #if defined(PETSC_USE_COMPLEX) 3347 PetscReal r = PetscRealPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3348 PetscReal c = PetscImaginaryPart(pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]); 3349 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e + %1.4e i\n",r,c);CHKERRQ(ierr); 3350 #else 3351 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer," %1.4e\n",pcbddc->adaptive_constraints_data[ii*subset_size+j+pcbddc->adaptive_constraints_data_ptr[cum]]);CHKERRQ(ierr); 3352 #endif 3353 } 3354 } 3355 } 3356 ierr = PetscMemcpy(pcbddc->adaptive_constraints_idxs+pcbddc->adaptive_constraints_idxs_ptr[cum],idxs,subset_size*sizeof(PetscInt));CHKERRQ(ierr); 3357 pcbddc->adaptive_constraints_idxs_ptr[cum+1] = pcbddc->adaptive_constraints_idxs_ptr[cum] + subset_size; 3358 pcbddc->adaptive_constraints_data_ptr[cum+1] = pcbddc->adaptive_constraints_data_ptr[cum] + subset_size*B_neigs; 3359 cum++; 3360 } 3361 ierr = ISRestoreIndices(sub_schurs->is_subs[i],&idxs);CHKERRQ(ierr); 3362 /* shift for next computation */ 3363 cumarray += subset_size*subset_size; 3364 } 3365 if (pcbddc->dbg_flag) { 3366 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 3367 } 3368 3369 if (mss) { 3370 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_inv_all,&Sarray);CHKERRQ(ierr); 3371 ierr = MatSeqAIJRestoreArray(sub_schurs->sum_S_Ej_tilda_all,&Starray);CHKERRQ(ierr); 3372 /* destroy matrices (junk) */ 3373 ierr = MatDestroy(&sub_schurs->sum_S_Ej_inv_all);CHKERRQ(ierr); 3374 ierr = MatDestroy(&sub_schurs->sum_S_Ej_tilda_all);CHKERRQ(ierr); 3375 } 3376 if (allocated_S_St) { 3377 ierr = PetscFree2(S,St);CHKERRQ(ierr); 3378 } 3379 ierr = PetscFree5(eigv,eigs,work,B_iwork,B_ifail);CHKERRQ(ierr); 3380 #if defined(PETSC_USE_COMPLEX) 3381 ierr = PetscFree(rwork);CHKERRQ(ierr); 3382 #endif 3383 if (pcbddc->dbg_flag) { 3384 PetscInt maxneigs_r; 3385 ierr = MPIU_Allreduce(&maxneigs,&maxneigs_r,1,MPIU_INT,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 3386 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of constraints per cc %d\n",maxneigs_r);CHKERRQ(ierr); 3387 } 3388 PetscFunctionReturn(0); 3389 } 3390 3391 PetscErrorCode PCBDDCSetUpSolvers(PC pc) 3392 { 3393 PetscScalar *coarse_submat_vals; 3394 PetscErrorCode ierr; 3395 3396 PetscFunctionBegin; 3397 /* Setup local scatters R_to_B and (optionally) R_to_D */ 3398 /* PCBDDCSetUpLocalWorkVectors should be called first! */ 3399 ierr = PCBDDCSetUpLocalScatters(pc);CHKERRQ(ierr); 3400 3401 /* Setup local neumann solver ksp_R */ 3402 /* PCBDDCSetUpLocalScatters should be called first! */ 3403 ierr = PCBDDCSetUpLocalSolvers(pc,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 3404 3405 /* 3406 Setup local correction and local part of coarse basis. 3407 Gives back the dense local part of the coarse matrix in column major ordering 3408 */ 3409 ierr = PCBDDCSetUpCorrection(pc,&coarse_submat_vals);CHKERRQ(ierr); 3410 3411 /* Compute total number of coarse nodes and setup coarse solver */ 3412 ierr = PCBDDCSetUpCoarseSolver(pc,coarse_submat_vals);CHKERRQ(ierr); 3413 3414 /* free */ 3415 ierr = PetscFree(coarse_submat_vals);CHKERRQ(ierr); 3416 PetscFunctionReturn(0); 3417 } 3418 3419 PetscErrorCode PCBDDCResetCustomization(PC pc) 3420 { 3421 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3422 PetscErrorCode ierr; 3423 3424 PetscFunctionBegin; 3425 ierr = ISDestroy(&pcbddc->user_primal_vertices);CHKERRQ(ierr); 3426 ierr = ISDestroy(&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 3427 ierr = ISDestroy(&pcbddc->NeumannBoundaries);CHKERRQ(ierr); 3428 ierr = ISDestroy(&pcbddc->NeumannBoundariesLocal);CHKERRQ(ierr); 3429 ierr = ISDestroy(&pcbddc->DirichletBoundaries);CHKERRQ(ierr); 3430 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 3431 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 3432 ierr = ISDestroy(&pcbddc->DirichletBoundariesLocal);CHKERRQ(ierr); 3433 ierr = PCBDDCSetDofsSplitting(pc,0,NULL);CHKERRQ(ierr); 3434 ierr = PCBDDCSetDofsSplittingLocal(pc,0,NULL);CHKERRQ(ierr); 3435 PetscFunctionReturn(0); 3436 } 3437 3438 PetscErrorCode PCBDDCResetTopography(PC pc) 3439 { 3440 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3441 PetscInt i; 3442 PetscErrorCode ierr; 3443 3444 PetscFunctionBegin; 3445 ierr = MatDestroy(&pcbddc->nedcG);CHKERRQ(ierr); 3446 ierr = ISDestroy(&pcbddc->nedclocal);CHKERRQ(ierr); 3447 ierr = MatDestroy(&pcbddc->discretegradient);CHKERRQ(ierr); 3448 ierr = MatDestroy(&pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 3449 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 3450 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 3451 ierr = VecDestroy(&pcbddc->work_change);CHKERRQ(ierr); 3452 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 3453 ierr = MatDestroy(&pcbddc->divudotp);CHKERRQ(ierr); 3454 ierr = ISDestroy(&pcbddc->divudotp_vl2l);CHKERRQ(ierr); 3455 ierr = PCBDDCGraphDestroy(&pcbddc->mat_graph);CHKERRQ(ierr); 3456 for (i=0;i<pcbddc->n_local_subs;i++) { 3457 ierr = ISDestroy(&pcbddc->local_subs[i]);CHKERRQ(ierr); 3458 } 3459 pcbddc->n_local_subs = 0; 3460 ierr = PetscFree(pcbddc->local_subs);CHKERRQ(ierr); 3461 ierr = PCBDDCSubSchursDestroy(&pcbddc->sub_schurs);CHKERRQ(ierr); 3462 pcbddc->graphanalyzed = PETSC_FALSE; 3463 pcbddc->recompute_topography = PETSC_TRUE; 3464 PetscFunctionReturn(0); 3465 } 3466 3467 PetscErrorCode PCBDDCResetSolvers(PC pc) 3468 { 3469 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3470 PetscErrorCode ierr; 3471 3472 PetscFunctionBegin; 3473 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 3474 if (pcbddc->coarse_phi_B) { 3475 PetscScalar *array; 3476 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&array);CHKERRQ(ierr); 3477 ierr = PetscFree(array);CHKERRQ(ierr); 3478 } 3479 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3480 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3481 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3482 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3483 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3484 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3485 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3486 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3487 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3488 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3489 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 3490 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 3491 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 3492 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 3493 ierr = KSPReset(pcbddc->ksp_D);CHKERRQ(ierr); 3494 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 3495 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 3496 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 3497 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 3498 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 3499 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 3500 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 3501 ierr = MatDestroy(&pcbddc->benign_change);CHKERRQ(ierr); 3502 ierr = VecDestroy(&pcbddc->benign_vec);CHKERRQ(ierr); 3503 ierr = PCBDDCBenignShellMat(pc,PETSC_TRUE);CHKERRQ(ierr); 3504 ierr = MatDestroy(&pcbddc->benign_B0);CHKERRQ(ierr); 3505 ierr = PetscSFDestroy(&pcbddc->benign_sf);CHKERRQ(ierr); 3506 if (pcbddc->benign_zerodiag_subs) { 3507 PetscInt i; 3508 for (i=0;i<pcbddc->benign_n;i++) { 3509 ierr = ISDestroy(&pcbddc->benign_zerodiag_subs[i]);CHKERRQ(ierr); 3510 } 3511 ierr = PetscFree(pcbddc->benign_zerodiag_subs);CHKERRQ(ierr); 3512 } 3513 ierr = PetscFree3(pcbddc->benign_p0_lidx,pcbddc->benign_p0_gidx,pcbddc->benign_p0);CHKERRQ(ierr); 3514 PetscFunctionReturn(0); 3515 } 3516 3517 PetscErrorCode PCBDDCSetUpLocalWorkVectors(PC pc) 3518 { 3519 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 3520 PC_IS *pcis = (PC_IS*)pc->data; 3521 VecType impVecType; 3522 PetscInt n_constraints,n_R,old_size; 3523 PetscErrorCode ierr; 3524 3525 PetscFunctionBegin; 3526 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - pcbddc->n_vertices; 3527 n_R = pcis->n - pcbddc->n_vertices; 3528 ierr = VecGetType(pcis->vec1_N,&impVecType);CHKERRQ(ierr); 3529 /* local work vectors (try to avoid unneeded work)*/ 3530 /* R nodes */ 3531 old_size = -1; 3532 if (pcbddc->vec1_R) { 3533 ierr = VecGetSize(pcbddc->vec1_R,&old_size);CHKERRQ(ierr); 3534 } 3535 if (n_R != old_size) { 3536 ierr = VecDestroy(&pcbddc->vec1_R);CHKERRQ(ierr); 3537 ierr = VecDestroy(&pcbddc->vec2_R);CHKERRQ(ierr); 3538 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_R);CHKERRQ(ierr); 3539 ierr = VecSetSizes(pcbddc->vec1_R,PETSC_DECIDE,n_R);CHKERRQ(ierr); 3540 ierr = VecSetType(pcbddc->vec1_R,impVecType);CHKERRQ(ierr); 3541 ierr = VecDuplicate(pcbddc->vec1_R,&pcbddc->vec2_R);CHKERRQ(ierr); 3542 } 3543 /* local primal dofs */ 3544 old_size = -1; 3545 if (pcbddc->vec1_P) { 3546 ierr = VecGetSize(pcbddc->vec1_P,&old_size);CHKERRQ(ierr); 3547 } 3548 if (pcbddc->local_primal_size != old_size) { 3549 ierr = VecDestroy(&pcbddc->vec1_P);CHKERRQ(ierr); 3550 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_P);CHKERRQ(ierr); 3551 ierr = VecSetSizes(pcbddc->vec1_P,PETSC_DECIDE,pcbddc->local_primal_size);CHKERRQ(ierr); 3552 ierr = VecSetType(pcbddc->vec1_P,impVecType);CHKERRQ(ierr); 3553 } 3554 /* local explicit constraints */ 3555 old_size = -1; 3556 if (pcbddc->vec1_C) { 3557 ierr = VecGetSize(pcbddc->vec1_C,&old_size);CHKERRQ(ierr); 3558 } 3559 if (n_constraints && n_constraints != old_size) { 3560 ierr = VecDestroy(&pcbddc->vec1_C);CHKERRQ(ierr); 3561 ierr = VecCreate(PetscObjectComm((PetscObject)pcis->vec1_N),&pcbddc->vec1_C);CHKERRQ(ierr); 3562 ierr = VecSetSizes(pcbddc->vec1_C,PETSC_DECIDE,n_constraints);CHKERRQ(ierr); 3563 ierr = VecSetType(pcbddc->vec1_C,impVecType);CHKERRQ(ierr); 3564 } 3565 PetscFunctionReturn(0); 3566 } 3567 3568 PetscErrorCode PCBDDCSetUpCorrection(PC pc, PetscScalar **coarse_submat_vals_n) 3569 { 3570 PetscErrorCode ierr; 3571 /* pointers to pcis and pcbddc */ 3572 PC_IS* pcis = (PC_IS*)pc->data; 3573 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 3574 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 3575 /* submatrices of local problem */ 3576 Mat A_RV,A_VR,A_VV,local_auxmat2_R; 3577 /* submatrices of local coarse problem */ 3578 Mat S_VV,S_CV,S_VC,S_CC; 3579 /* working matrices */ 3580 Mat C_CR; 3581 /* additional working stuff */ 3582 PC pc_R; 3583 Mat F,Brhs = NULL; 3584 Vec dummy_vec; 3585 PetscBool isLU,isCHOL,isILU,need_benign_correction,sparserhs; 3586 PetscScalar *coarse_submat_vals; /* TODO: use a PETSc matrix */ 3587 PetscScalar *work; 3588 PetscInt *idx_V_B; 3589 PetscInt lda_rhs,n,n_vertices,n_constraints,*p0_lidx_I; 3590 PetscInt i,n_R,n_D,n_B; 3591 3592 /* some shortcuts to scalars */ 3593 PetscScalar one=1.0,m_one=-1.0; 3594 3595 PetscFunctionBegin; 3596 if (!pcbddc->symmetric_primal && pcbddc->benign_n) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Non-symmetric primal basis computation with benign trick not yet implemented"); 3597 3598 /* Set Non-overlapping dimensions */ 3599 n_vertices = pcbddc->n_vertices; 3600 n_constraints = pcbddc->local_primal_size - pcbddc->benign_n - n_vertices; 3601 n_B = pcis->n_B; 3602 n_D = pcis->n - n_B; 3603 n_R = pcis->n - n_vertices; 3604 3605 /* vertices in boundary numbering */ 3606 ierr = PetscMalloc1(n_vertices,&idx_V_B);CHKERRQ(ierr); 3607 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,n_vertices,pcbddc->local_primal_ref_node,&i,idx_V_B);CHKERRQ(ierr); 3608 if (i != n_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",n_vertices,i); 3609 3610 /* Subdomain contribution (Non-overlapping) to coarse matrix */ 3611 ierr = PetscCalloc1(pcbddc->local_primal_size*pcbddc->local_primal_size,&coarse_submat_vals);CHKERRQ(ierr); 3612 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_vertices,coarse_submat_vals,&S_VV);CHKERRQ(ierr); 3613 ierr = MatSeqDenseSetLDA(S_VV,pcbddc->local_primal_size);CHKERRQ(ierr); 3614 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_vertices,coarse_submat_vals+n_vertices,&S_CV);CHKERRQ(ierr); 3615 ierr = MatSeqDenseSetLDA(S_CV,pcbddc->local_primal_size);CHKERRQ(ierr); 3616 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_vertices,n_constraints,coarse_submat_vals+pcbddc->local_primal_size*n_vertices,&S_VC);CHKERRQ(ierr); 3617 ierr = MatSeqDenseSetLDA(S_VC,pcbddc->local_primal_size);CHKERRQ(ierr); 3618 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_constraints,n_constraints,coarse_submat_vals+(pcbddc->local_primal_size+1)*n_vertices,&S_CC);CHKERRQ(ierr); 3619 ierr = MatSeqDenseSetLDA(S_CC,pcbddc->local_primal_size);CHKERRQ(ierr); 3620 3621 /* determine if can use MatSolve routines instead of calling KSPSolve on ksp_R */ 3622 ierr = KSPGetPC(pcbddc->ksp_R,&pc_R);CHKERRQ(ierr); 3623 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCLU,&isLU);CHKERRQ(ierr); 3624 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCILU,&isILU);CHKERRQ(ierr); 3625 ierr = PetscObjectTypeCompare((PetscObject)pc_R,PCCHOLESKY,&isCHOL);CHKERRQ(ierr); 3626 lda_rhs = n_R; 3627 need_benign_correction = PETSC_FALSE; 3628 if (isLU || isILU || isCHOL) { 3629 ierr = PCFactorGetMatrix(pc_R,&F);CHKERRQ(ierr); 3630 } else if (sub_schurs && sub_schurs->reuse_solver) { 3631 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3632 MatFactorType type; 3633 3634 F = reuse_solver->F; 3635 ierr = MatGetFactorType(F,&type);CHKERRQ(ierr); 3636 if (type == MAT_FACTOR_CHOLESKY) isCHOL = PETSC_TRUE; 3637 ierr = MatGetSize(F,&lda_rhs,NULL);CHKERRQ(ierr); 3638 need_benign_correction = (PetscBool)(!!reuse_solver->benign_n); 3639 } else { 3640 F = NULL; 3641 } 3642 3643 /* determine if we can use a sparse right-hand side */ 3644 sparserhs = PETSC_FALSE; 3645 if (F) { 3646 MatSolverType solver; 3647 3648 ierr = MatFactorGetSolverType(F,&solver);CHKERRQ(ierr); 3649 ierr = PetscStrcmp(solver,MATSOLVERMUMPS,&sparserhs);CHKERRQ(ierr); 3650 } 3651 3652 /* allocate workspace */ 3653 n = 0; 3654 if (n_constraints) { 3655 n += lda_rhs*n_constraints; 3656 } 3657 if (n_vertices) { 3658 n = PetscMax(2*lda_rhs*n_vertices,n); 3659 n = PetscMax((lda_rhs+n_B)*n_vertices,n); 3660 } 3661 if (!pcbddc->symmetric_primal) { 3662 n = PetscMax(2*lda_rhs*pcbddc->local_primal_size,n); 3663 } 3664 ierr = PetscMalloc1(n,&work);CHKERRQ(ierr); 3665 3666 /* create dummy vector to modify rhs and sol of MatMatSolve (work array will never be used) */ 3667 dummy_vec = NULL; 3668 if (need_benign_correction && lda_rhs != n_R && F) { 3669 ierr = VecCreateSeqWithArray(PETSC_COMM_SELF,1,lda_rhs,work,&dummy_vec);CHKERRQ(ierr); 3670 } 3671 3672 /* Precompute stuffs needed for preprocessing and application of BDDC*/ 3673 if (n_constraints) { 3674 Mat M1,M2,M3,C_B; 3675 IS is_aux; 3676 PetscScalar *array,*array2; 3677 3678 ierr = MatDestroy(&pcbddc->local_auxmat1);CHKERRQ(ierr); 3679 ierr = MatDestroy(&pcbddc->local_auxmat2);CHKERRQ(ierr); 3680 3681 /* Extract constraints on R nodes: C_{CR} */ 3682 ierr = ISCreateStride(PETSC_COMM_SELF,n_constraints,n_vertices,1,&is_aux);CHKERRQ(ierr); 3683 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&C_CR);CHKERRQ(ierr); 3684 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_aux,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 3685 3686 /* Assemble local_auxmat2_R = (- A_{RR}^{-1} C^T_{CR}) needed by BDDC setup */ 3687 /* Assemble pcbddc->local_auxmat2 = R_to_B (- A_{RR}^{-1} C^T_{CR}) needed by BDDC application */ 3688 if (!sparserhs) { 3689 ierr = PetscMemzero(work,lda_rhs*n_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 3690 for (i=0;i<n_constraints;i++) { 3691 const PetscScalar *row_cmat_values; 3692 const PetscInt *row_cmat_indices; 3693 PetscInt size_of_constraint,j; 3694 3695 ierr = MatGetRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3696 for (j=0;j<size_of_constraint;j++) { 3697 work[row_cmat_indices[j]+i*lda_rhs] = -row_cmat_values[j]; 3698 } 3699 ierr = MatRestoreRow(C_CR,i,&size_of_constraint,&row_cmat_indices,&row_cmat_values);CHKERRQ(ierr); 3700 } 3701 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&Brhs);CHKERRQ(ierr); 3702 } else { 3703 Mat tC_CR; 3704 3705 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3706 if (lda_rhs != n_R) { 3707 PetscScalar *aa; 3708 PetscInt r,*ii,*jj; 3709 PetscBool done; 3710 3711 ierr = MatGetRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3712 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 3713 ierr = MatSeqAIJGetArray(C_CR,&aa);CHKERRQ(ierr); 3714 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_constraints,lda_rhs,ii,jj,aa,&tC_CR);CHKERRQ(ierr); 3715 ierr = MatRestoreRowIJ(C_CR,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 3716 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 3717 } else { 3718 ierr = PetscObjectReference((PetscObject)C_CR);CHKERRQ(ierr); 3719 tC_CR = C_CR; 3720 } 3721 ierr = MatCreateTranspose(tC_CR,&Brhs);CHKERRQ(ierr); 3722 ierr = MatDestroy(&tC_CR);CHKERRQ(ierr); 3723 } 3724 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,NULL,&local_auxmat2_R);CHKERRQ(ierr); 3725 if (F) { 3726 if (need_benign_correction) { 3727 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3728 3729 /* rhs is already zero on interior dofs, no need to change the rhs */ 3730 ierr = PetscMemzero(reuse_solver->benign_save_vals,pcbddc->benign_n*sizeof(PetscScalar));CHKERRQ(ierr); 3731 } 3732 ierr = MatMatSolve(F,Brhs,local_auxmat2_R);CHKERRQ(ierr); 3733 if (need_benign_correction) { 3734 PetscScalar *marr; 3735 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3736 3737 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3738 if (lda_rhs != n_R) { 3739 for (i=0;i<n_constraints;i++) { 3740 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 3741 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3742 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 3743 } 3744 } else { 3745 for (i=0;i<n_constraints;i++) { 3746 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 3747 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 3748 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3749 } 3750 } 3751 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3752 } 3753 } else { 3754 PetscScalar *marr; 3755 3756 ierr = MatDenseGetArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3757 for (i=0;i<n_constraints;i++) { 3758 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 3759 ierr = VecPlaceArray(pcbddc->vec2_R,marr+i*lda_rhs);CHKERRQ(ierr); 3760 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 3761 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3762 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 3763 } 3764 ierr = MatDenseRestoreArray(local_auxmat2_R,&marr);CHKERRQ(ierr); 3765 } 3766 if (sparserhs) { 3767 ierr = MatScale(C_CR,-1.0);CHKERRQ(ierr); 3768 } 3769 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 3770 if (!pcbddc->switch_static) { 3771 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_constraints,NULL,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3772 ierr = MatDenseGetArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3773 ierr = MatDenseGetArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3774 for (i=0;i<n_constraints;i++) { 3775 ierr = VecPlaceArray(pcbddc->vec1_R,array2+i*lda_rhs);CHKERRQ(ierr); 3776 ierr = VecPlaceArray(pcis->vec1_B,array+i*n_B);CHKERRQ(ierr); 3777 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3778 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 3779 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 3780 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 3781 } 3782 ierr = MatDenseRestoreArray(local_auxmat2_R,&array2);CHKERRQ(ierr); 3783 ierr = MatDenseRestoreArray(pcbddc->local_auxmat2,&array);CHKERRQ(ierr); 3784 ierr = MatMatMult(C_B,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3785 } else { 3786 if (lda_rhs != n_R) { 3787 IS dummy; 3788 3789 ierr = ISCreateStride(PETSC_COMM_SELF,n_R,0,1,&dummy);CHKERRQ(ierr); 3790 ierr = MatCreateSubMatrix(local_auxmat2_R,dummy,NULL,MAT_INITIAL_MATRIX,&pcbddc->local_auxmat2);CHKERRQ(ierr); 3791 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 3792 } else { 3793 ierr = PetscObjectReference((PetscObject)local_auxmat2_R);CHKERRQ(ierr); 3794 pcbddc->local_auxmat2 = local_auxmat2_R; 3795 } 3796 ierr = MatMatMult(C_CR,pcbddc->local_auxmat2,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&M3);CHKERRQ(ierr); 3797 } 3798 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3799 /* Assemble explicitly S_CC = ( C_{CR} A_{RR}^{-1} C^T_{CR} )^{-1} */ 3800 ierr = MatScale(M3,m_one);CHKERRQ(ierr); 3801 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M1);CHKERRQ(ierr); 3802 ierr = MatDuplicate(M3,MAT_DO_NOT_COPY_VALUES,&M2);CHKERRQ(ierr); 3803 if (isCHOL) { 3804 ierr = MatCholeskyFactor(M3,NULL,NULL);CHKERRQ(ierr); 3805 } else { 3806 ierr = MatLUFactor(M3,NULL,NULL,NULL);CHKERRQ(ierr); 3807 } 3808 ierr = VecSet(pcbddc->vec1_C,one);CHKERRQ(ierr); 3809 ierr = MatDiagonalSet(M2,pcbddc->vec1_C,INSERT_VALUES);CHKERRQ(ierr); 3810 ierr = MatMatSolve(M3,M2,M1);CHKERRQ(ierr); 3811 ierr = MatDestroy(&M2);CHKERRQ(ierr); 3812 ierr = MatDestroy(&M3);CHKERRQ(ierr); 3813 /* Assemble local_auxmat1 = S_CC*C_{CB} needed by BDDC application in KSP and in preproc */ 3814 ierr = MatMatMult(M1,C_B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->local_auxmat1);CHKERRQ(ierr); 3815 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 3816 ierr = MatCopy(M1,S_CC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); /* S_CC can have a different LDA, MatMatSolve doesn't support it */ 3817 ierr = MatDestroy(&M1);CHKERRQ(ierr); 3818 } 3819 3820 /* Get submatrices from subdomain matrix */ 3821 if (n_vertices) { 3822 IS is_aux; 3823 PetscBool isseqaij; 3824 3825 if (sub_schurs && sub_schurs->reuse_solver) { /* is_R_local is not sorted, ISComplement doesn't like it */ 3826 IS tis; 3827 3828 ierr = ISDuplicate(pcbddc->is_R_local,&tis);CHKERRQ(ierr); 3829 ierr = ISSort(tis);CHKERRQ(ierr); 3830 ierr = ISComplement(tis,0,pcis->n,&is_aux);CHKERRQ(ierr); 3831 ierr = ISDestroy(&tis);CHKERRQ(ierr); 3832 } else { 3833 ierr = ISComplement(pcbddc->is_R_local,0,pcis->n,&is_aux);CHKERRQ(ierr); 3834 } 3835 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,is_aux,MAT_INITIAL_MATRIX,&A_RV);CHKERRQ(ierr); 3836 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_VR);CHKERRQ(ierr); 3837 ierr = PetscObjectTypeCompare((PetscObject)A_VR,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 3838 if (!isseqaij) { /* MatMatMult(A_VR,A_RRmA_RV) below will raise an error */ 3839 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 3840 } 3841 ierr = MatCreateSubMatrix(pcbddc->local_mat,is_aux,is_aux,MAT_INITIAL_MATRIX,&A_VV);CHKERRQ(ierr); 3842 ierr = ISDestroy(&is_aux);CHKERRQ(ierr); 3843 } 3844 3845 /* Matrix of coarse basis functions (local) */ 3846 if (pcbddc->coarse_phi_B) { 3847 PetscInt on_B,on_primal,on_D=n_D; 3848 if (pcbddc->coarse_phi_D) { 3849 ierr = MatGetSize(pcbddc->coarse_phi_D,&on_D,NULL);CHKERRQ(ierr); 3850 } 3851 ierr = MatGetSize(pcbddc->coarse_phi_B,&on_B,&on_primal);CHKERRQ(ierr); 3852 if (on_B != n_B || on_primal != pcbddc->local_primal_size || on_D != n_D) { 3853 PetscScalar *marray; 3854 3855 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&marray);CHKERRQ(ierr); 3856 ierr = PetscFree(marray);CHKERRQ(ierr); 3857 ierr = MatDestroy(&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3858 ierr = MatDestroy(&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3859 ierr = MatDestroy(&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3860 ierr = MatDestroy(&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3861 } 3862 } 3863 3864 if (!pcbddc->coarse_phi_B) { 3865 PetscScalar *marr; 3866 3867 /* memory size */ 3868 n = n_B*pcbddc->local_primal_size; 3869 if (pcbddc->switch_static || pcbddc->dbg_flag) n += n_D*pcbddc->local_primal_size; 3870 if (!pcbddc->symmetric_primal) n *= 2; 3871 ierr = PetscCalloc1(n,&marr);CHKERRQ(ierr); 3872 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_B);CHKERRQ(ierr); 3873 marr += n_B*pcbddc->local_primal_size; 3874 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3875 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_phi_D);CHKERRQ(ierr); 3876 marr += n_D*pcbddc->local_primal_size; 3877 } 3878 if (!pcbddc->symmetric_primal) { 3879 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_B);CHKERRQ(ierr); 3880 marr += n_B*pcbddc->local_primal_size; 3881 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3882 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_D,pcbddc->local_primal_size,marr,&pcbddc->coarse_psi_D);CHKERRQ(ierr); 3883 } 3884 } else { 3885 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_B);CHKERRQ(ierr); 3886 pcbddc->coarse_psi_B = pcbddc->coarse_phi_B; 3887 if (pcbddc->switch_static || pcbddc->dbg_flag) { 3888 ierr = PetscObjectReference((PetscObject)pcbddc->coarse_phi_D);CHKERRQ(ierr); 3889 pcbddc->coarse_psi_D = pcbddc->coarse_phi_D; 3890 } 3891 } 3892 } 3893 3894 /* We are now ready to evaluate coarse basis functions and subdomain contribution to coarse problem */ 3895 p0_lidx_I = NULL; 3896 if (pcbddc->benign_n && (pcbddc->switch_static || pcbddc->dbg_flag)) { 3897 const PetscInt *idxs; 3898 3899 ierr = ISGetIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3900 ierr = PetscMalloc1(pcbddc->benign_n,&p0_lidx_I);CHKERRQ(ierr); 3901 for (i=0;i<pcbddc->benign_n;i++) { 3902 ierr = PetscFindInt(pcbddc->benign_p0_lidx[i],pcis->n-pcis->n_B,idxs,&p0_lidx_I[i]);CHKERRQ(ierr); 3903 } 3904 ierr = ISRestoreIndices(pcis->is_I_local,&idxs);CHKERRQ(ierr); 3905 } 3906 3907 /* vertices */ 3908 if (n_vertices) { 3909 PetscBool restoreavr = PETSC_FALSE; 3910 3911 ierr = MatConvert(A_VV,MATDENSE,MAT_INPLACE_MATRIX,&A_VV);CHKERRQ(ierr); 3912 3913 if (n_R) { 3914 Mat A_RRmA_RV,A_RV_bcorr=NULL,S_VVt; /* S_VVt with LDA=N */ 3915 PetscBLASInt B_N,B_one = 1; 3916 PetscScalar *x,*y; 3917 3918 ierr = MatScale(A_RV,m_one);CHKERRQ(ierr); 3919 if (need_benign_correction) { 3920 ISLocalToGlobalMapping RtoN; 3921 IS is_p0; 3922 PetscInt *idxs_p0,n; 3923 3924 ierr = PetscMalloc1(pcbddc->benign_n,&idxs_p0);CHKERRQ(ierr); 3925 ierr = ISLocalToGlobalMappingCreateIS(pcbddc->is_R_local,&RtoN);CHKERRQ(ierr); 3926 ierr = ISGlobalToLocalMappingApply(RtoN,IS_GTOLM_DROP,pcbddc->benign_n,pcbddc->benign_p0_lidx,&n,idxs_p0);CHKERRQ(ierr); 3927 if (n != pcbddc->benign_n) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in R numbering for benign p0! %d != %d\n",n,pcbddc->benign_n); 3928 ierr = ISLocalToGlobalMappingDestroy(&RtoN);CHKERRQ(ierr); 3929 ierr = ISCreateGeneral(PETSC_COMM_SELF,n,idxs_p0,PETSC_OWN_POINTER,&is_p0);CHKERRQ(ierr); 3930 ierr = MatCreateSubMatrix(A_RV,is_p0,NULL,MAT_INITIAL_MATRIX,&A_RV_bcorr);CHKERRQ(ierr); 3931 ierr = ISDestroy(&is_p0);CHKERRQ(ierr); 3932 } 3933 3934 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 3935 if (!sparserhs || need_benign_correction) { 3936 if (lda_rhs == n_R) { 3937 ierr = MatConvert(A_RV,MATDENSE,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3938 } else { 3939 PetscScalar *av,*array; 3940 const PetscInt *xadj,*adjncy; 3941 PetscInt n; 3942 PetscBool flg_row; 3943 3944 array = work+lda_rhs*n_vertices; 3945 ierr = PetscMemzero(array,lda_rhs*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 3946 ierr = MatConvert(A_RV,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_RV);CHKERRQ(ierr); 3947 ierr = MatGetRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3948 ierr = MatSeqAIJGetArray(A_RV,&av);CHKERRQ(ierr); 3949 for (i=0;i<n;i++) { 3950 PetscInt j; 3951 for (j=xadj[i];j<xadj[i+1];j++) array[lda_rhs*adjncy[j]+i] = av[j]; 3952 } 3953 ierr = MatRestoreRowIJ(A_RV,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 3954 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 3955 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,array,&A_RV);CHKERRQ(ierr); 3956 } 3957 if (need_benign_correction) { 3958 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 3959 PetscScalar *marr; 3960 3961 ierr = MatDenseGetArray(A_RV,&marr);CHKERRQ(ierr); 3962 /* need \Phi^T A_RV = (I+L)A_RV, L given by 3963 3964 | 0 0 0 | (V) 3965 L = | 0 0 -1 | (P-p0) 3966 | 0 0 -1 | (p0) 3967 3968 */ 3969 for (i=0;i<reuse_solver->benign_n;i++) { 3970 const PetscScalar *vals; 3971 const PetscInt *idxs,*idxs_zero; 3972 PetscInt n,j,nz; 3973 3974 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 3975 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3976 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3977 for (j=0;j<n;j++) { 3978 PetscScalar val = vals[j]; 3979 PetscInt k,col = idxs[j]; 3980 for (k=0;k<nz;k++) marr[idxs_zero[k]+lda_rhs*col] -= val; 3981 } 3982 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 3983 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 3984 } 3985 ierr = MatDenseRestoreArray(A_RV,&marr);CHKERRQ(ierr); 3986 } 3987 ierr = PetscObjectReference((PetscObject)A_RV);CHKERRQ(ierr); 3988 Brhs = A_RV; 3989 } else { 3990 Mat tA_RVT,A_RVT; 3991 3992 if (!pcbddc->symmetric_primal) { 3993 ierr = MatTranspose(A_RV,MAT_INITIAL_MATRIX,&A_RVT);CHKERRQ(ierr); 3994 } else { 3995 restoreavr = PETSC_TRUE; 3996 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 3997 ierr = PetscObjectReference((PetscObject)A_VR);CHKERRQ(ierr); 3998 A_RVT = A_VR; 3999 } 4000 if (lda_rhs != n_R) { 4001 PetscScalar *aa; 4002 PetscInt r,*ii,*jj; 4003 PetscBool done; 4004 4005 ierr = MatGetRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4006 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"GetRowIJ failed"); 4007 ierr = MatSeqAIJGetArray(A_RVT,&aa);CHKERRQ(ierr); 4008 ierr = MatCreateSeqAIJWithArrays(PETSC_COMM_SELF,n_vertices,lda_rhs,ii,jj,aa,&tA_RVT);CHKERRQ(ierr); 4009 ierr = MatRestoreRowIJ(A_RVT,0,PETSC_FALSE,PETSC_FALSE,&r,(const PetscInt**)&ii,(const PetscInt**)&jj,&done);CHKERRQ(ierr); 4010 if (!done) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"RestoreRowIJ failed"); 4011 } else { 4012 ierr = PetscObjectReference((PetscObject)A_RVT);CHKERRQ(ierr); 4013 tA_RVT = A_RVT; 4014 } 4015 ierr = MatCreateTranspose(tA_RVT,&Brhs);CHKERRQ(ierr); 4016 ierr = MatDestroy(&tA_RVT);CHKERRQ(ierr); 4017 ierr = MatDestroy(&A_RVT);CHKERRQ(ierr); 4018 } 4019 if (F) { 4020 /* need to correct the rhs */ 4021 if (need_benign_correction) { 4022 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4023 PetscScalar *marr; 4024 4025 ierr = MatDenseGetArray(Brhs,&marr);CHKERRQ(ierr); 4026 if (lda_rhs != n_R) { 4027 for (i=0;i<n_vertices;i++) { 4028 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4029 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4030 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4031 } 4032 } else { 4033 for (i=0;i<n_vertices;i++) { 4034 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4035 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 4036 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4037 } 4038 } 4039 ierr = MatDenseRestoreArray(Brhs,&marr);CHKERRQ(ierr); 4040 } 4041 ierr = MatMatSolve(F,Brhs,A_RRmA_RV);CHKERRQ(ierr); 4042 if (restoreavr) { 4043 ierr = MatScale(A_VR,-1.0);CHKERRQ(ierr); 4044 } 4045 /* need to correct the solution */ 4046 if (need_benign_correction) { 4047 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4048 PetscScalar *marr; 4049 4050 ierr = MatDenseGetArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4051 if (lda_rhs != n_R) { 4052 for (i=0;i<n_vertices;i++) { 4053 ierr = VecPlaceArray(dummy_vec,marr+i*lda_rhs);CHKERRQ(ierr); 4054 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,dummy_vec,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4055 ierr = VecResetArray(dummy_vec);CHKERRQ(ierr); 4056 } 4057 } else { 4058 for (i=0;i<n_vertices;i++) { 4059 ierr = VecPlaceArray(pcbddc->vec1_R,marr+i*lda_rhs);CHKERRQ(ierr); 4060 ierr = PCBDDCReuseSolversBenignAdapt(reuse_solver,pcbddc->vec1_R,NULL,PETSC_TRUE,PETSC_TRUE);CHKERRQ(ierr); 4061 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4062 } 4063 } 4064 ierr = MatDenseRestoreArray(A_RRmA_RV,&marr);CHKERRQ(ierr); 4065 } 4066 } else { 4067 ierr = MatDenseGetArray(Brhs,&y);CHKERRQ(ierr); 4068 for (i=0;i<n_vertices;i++) { 4069 ierr = VecPlaceArray(pcbddc->vec1_R,y+i*lda_rhs);CHKERRQ(ierr); 4070 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*lda_rhs);CHKERRQ(ierr); 4071 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4072 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4073 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4074 } 4075 ierr = MatDenseRestoreArray(Brhs,&y);CHKERRQ(ierr); 4076 } 4077 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4078 ierr = MatDestroy(&Brhs);CHKERRQ(ierr); 4079 /* S_VV and S_CV */ 4080 if (n_constraints) { 4081 Mat B; 4082 4083 ierr = PetscMemzero(work+lda_rhs*n_vertices,n_B*n_vertices*sizeof(PetscScalar));CHKERRQ(ierr); 4084 for (i=0;i<n_vertices;i++) { 4085 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*lda_rhs);CHKERRQ(ierr); 4086 ierr = VecPlaceArray(pcis->vec1_B,work+lda_rhs*n_vertices+i*n_B);CHKERRQ(ierr); 4087 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4088 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4089 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4090 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4091 } 4092 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_B,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4093 ierr = MatMatMult(pcbddc->local_auxmat1,B,MAT_REUSE_MATRIX,PETSC_DEFAULT,&S_CV);CHKERRQ(ierr); 4094 ierr = MatDestroy(&B);CHKERRQ(ierr); 4095 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_vertices,work+lda_rhs*n_vertices,&B);CHKERRQ(ierr); 4096 ierr = MatMatMult(local_auxmat2_R,S_CV,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4097 ierr = MatScale(S_CV,m_one);CHKERRQ(ierr); 4098 ierr = PetscBLASIntCast(lda_rhs*n_vertices,&B_N);CHKERRQ(ierr); 4099 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,work+lda_rhs*n_vertices,&B_one,work,&B_one)); 4100 ierr = MatDestroy(&B);CHKERRQ(ierr); 4101 } 4102 if (lda_rhs != n_R) { 4103 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4104 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,work,&A_RRmA_RV);CHKERRQ(ierr); 4105 ierr = MatSeqDenseSetLDA(A_RRmA_RV,lda_rhs);CHKERRQ(ierr); 4106 } 4107 ierr = MatMatMult(A_VR,A_RRmA_RV,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VVt);CHKERRQ(ierr); 4108 /* need A_VR * \Phi * A_RRmA_RV = A_VR * (I+L)^T * A_RRmA_RV, L given as before */ 4109 if (need_benign_correction) { 4110 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4111 PetscScalar *marr,*sums; 4112 4113 ierr = PetscMalloc1(n_vertices,&sums);CHKERRQ(ierr); 4114 ierr = MatDenseGetArray(S_VVt,&marr);CHKERRQ(ierr); 4115 for (i=0;i<reuse_solver->benign_n;i++) { 4116 const PetscScalar *vals; 4117 const PetscInt *idxs,*idxs_zero; 4118 PetscInt n,j,nz; 4119 4120 ierr = ISGetLocalSize(reuse_solver->benign_zerodiag_subs[i],&nz);CHKERRQ(ierr); 4121 ierr = ISGetIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4122 for (j=0;j<n_vertices;j++) { 4123 PetscInt k; 4124 sums[j] = 0.; 4125 for (k=0;k<nz;k++) sums[j] += work[idxs_zero[k]+j*lda_rhs]; 4126 } 4127 ierr = MatGetRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4128 for (j=0;j<n;j++) { 4129 PetscScalar val = vals[j]; 4130 PetscInt k; 4131 for (k=0;k<n_vertices;k++) { 4132 marr[idxs[j]+k*n_vertices] += val*sums[k]; 4133 } 4134 } 4135 ierr = MatRestoreRow(A_RV_bcorr,i,&n,&idxs,&vals);CHKERRQ(ierr); 4136 ierr = ISRestoreIndices(reuse_solver->benign_zerodiag_subs[i],&idxs_zero);CHKERRQ(ierr); 4137 } 4138 ierr = PetscFree(sums);CHKERRQ(ierr); 4139 ierr = MatDenseRestoreArray(S_VVt,&marr);CHKERRQ(ierr); 4140 ierr = MatDestroy(&A_RV_bcorr);CHKERRQ(ierr); 4141 } 4142 ierr = MatDestroy(&A_RRmA_RV);CHKERRQ(ierr); 4143 ierr = PetscBLASIntCast(n_vertices*n_vertices,&B_N);CHKERRQ(ierr); 4144 ierr = MatDenseGetArray(A_VV,&x);CHKERRQ(ierr); 4145 ierr = MatDenseGetArray(S_VVt,&y);CHKERRQ(ierr); 4146 PetscStackCallBLAS("BLASaxpy",BLASaxpy_(&B_N,&one,x,&B_one,y,&B_one)); 4147 ierr = MatDenseRestoreArray(A_VV,&x);CHKERRQ(ierr); 4148 ierr = MatDenseRestoreArray(S_VVt,&y);CHKERRQ(ierr); 4149 ierr = MatCopy(S_VVt,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4150 ierr = MatDestroy(&S_VVt);CHKERRQ(ierr); 4151 } else { 4152 ierr = MatCopy(A_VV,S_VV,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4153 } 4154 ierr = MatDestroy(&A_VV);CHKERRQ(ierr); 4155 4156 /* coarse basis functions */ 4157 for (i=0;i<n_vertices;i++) { 4158 PetscScalar *y; 4159 4160 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4161 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4162 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4163 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4164 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4165 y[n_B*i+idx_V_B[i]] = 1.0; 4166 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4167 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4168 4169 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4170 PetscInt j; 4171 4172 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4173 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4174 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4175 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4176 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4177 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4178 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4179 } 4180 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4181 } 4182 /* if n_R == 0 the object is not destroyed */ 4183 ierr = MatDestroy(&A_RV);CHKERRQ(ierr); 4184 } 4185 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 4186 4187 if (n_constraints) { 4188 Mat B; 4189 4190 ierr = MatCreateSeqDense(PETSC_COMM_SELF,lda_rhs,n_constraints,work,&B);CHKERRQ(ierr); 4191 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4192 ierr = MatMatMult(local_auxmat2_R,S_CC,MAT_REUSE_MATRIX,PETSC_DEFAULT,&B);CHKERRQ(ierr); 4193 ierr = MatScale(S_CC,m_one);CHKERRQ(ierr); 4194 if (n_vertices) { 4195 if (isCHOL || need_benign_correction) { /* if we can solve the interior problem with cholesky, we should also be fine with transposing here */ 4196 ierr = MatTranspose(S_CV,MAT_REUSE_MATRIX,&S_VC);CHKERRQ(ierr); 4197 } else { 4198 Mat S_VCt; 4199 4200 if (lda_rhs != n_R) { 4201 ierr = MatDestroy(&B);CHKERRQ(ierr); 4202 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_constraints,work,&B);CHKERRQ(ierr); 4203 ierr = MatSeqDenseSetLDA(B,lda_rhs);CHKERRQ(ierr); 4204 } 4205 ierr = MatMatMult(A_VR,B,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&S_VCt);CHKERRQ(ierr); 4206 ierr = MatCopy(S_VCt,S_VC,SAME_NONZERO_PATTERN);CHKERRQ(ierr); 4207 ierr = MatDestroy(&S_VCt);CHKERRQ(ierr); 4208 } 4209 } 4210 ierr = MatDestroy(&B);CHKERRQ(ierr); 4211 /* coarse basis functions */ 4212 for (i=0;i<n_constraints;i++) { 4213 PetscScalar *y; 4214 4215 ierr = VecPlaceArray(pcbddc->vec1_R,work+lda_rhs*i);CHKERRQ(ierr); 4216 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4217 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*(i+n_vertices));CHKERRQ(ierr); 4218 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4219 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4220 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&y);CHKERRQ(ierr); 4221 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4222 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4223 PetscInt j; 4224 4225 ierr = MatDenseGetArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4226 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*(i+n_vertices));CHKERRQ(ierr); 4227 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4228 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4229 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4230 for (j=0;j<pcbddc->benign_n;j++) y[n_D*i+p0_lidx_I[j]] = 0.0; 4231 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_D,&y);CHKERRQ(ierr); 4232 } 4233 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4234 } 4235 } 4236 if (n_constraints) { 4237 ierr = MatDestroy(&local_auxmat2_R);CHKERRQ(ierr); 4238 } 4239 ierr = PetscFree(p0_lidx_I);CHKERRQ(ierr); 4240 4241 /* coarse matrix entries relative to B_0 */ 4242 if (pcbddc->benign_n) { 4243 Mat B0_B,B0_BPHI; 4244 IS is_dummy; 4245 PetscScalar *data; 4246 PetscInt j; 4247 4248 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4249 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4250 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4251 ierr = MatMatMult(B0_B,pcbddc->coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4252 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4253 ierr = MatDenseGetArray(B0_BPHI,&data);CHKERRQ(ierr); 4254 for (j=0;j<pcbddc->benign_n;j++) { 4255 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4256 for (i=0;i<pcbddc->local_primal_size;i++) { 4257 coarse_submat_vals[primal_idx*pcbddc->local_primal_size+i] = data[i*pcbddc->benign_n+j]; 4258 coarse_submat_vals[i*pcbddc->local_primal_size+primal_idx] = data[i*pcbddc->benign_n+j]; 4259 } 4260 } 4261 ierr = MatDenseRestoreArray(B0_BPHI,&data);CHKERRQ(ierr); 4262 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4263 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4264 } 4265 4266 /* compute other basis functions for non-symmetric problems */ 4267 if (!pcbddc->symmetric_primal) { 4268 Mat B_V=NULL,B_C=NULL; 4269 PetscScalar *marray; 4270 4271 if (n_constraints) { 4272 Mat S_CCT,C_CRT; 4273 4274 ierr = MatTranspose(C_CR,MAT_INITIAL_MATRIX,&C_CRT);CHKERRQ(ierr); 4275 ierr = MatTranspose(S_CC,MAT_INITIAL_MATRIX,&S_CCT);CHKERRQ(ierr); 4276 ierr = MatMatMult(C_CRT,S_CCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_C);CHKERRQ(ierr); 4277 ierr = MatDestroy(&S_CCT);CHKERRQ(ierr); 4278 if (n_vertices) { 4279 Mat S_VCT; 4280 4281 ierr = MatTranspose(S_VC,MAT_INITIAL_MATRIX,&S_VCT);CHKERRQ(ierr); 4282 ierr = MatMatMult(C_CRT,S_VCT,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&B_V);CHKERRQ(ierr); 4283 ierr = MatDestroy(&S_VCT);CHKERRQ(ierr); 4284 } 4285 ierr = MatDestroy(&C_CRT);CHKERRQ(ierr); 4286 } else { 4287 ierr = MatCreateSeqDense(PETSC_COMM_SELF,n_R,n_vertices,NULL,&B_V);CHKERRQ(ierr); 4288 } 4289 if (n_vertices && n_R) { 4290 PetscScalar *av,*marray; 4291 const PetscInt *xadj,*adjncy; 4292 PetscInt n; 4293 PetscBool flg_row; 4294 4295 /* B_V = B_V - A_VR^T */ 4296 ierr = MatConvert(A_VR,MATSEQAIJ,MAT_INPLACE_MATRIX,&A_VR);CHKERRQ(ierr); 4297 ierr = MatGetRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4298 ierr = MatSeqAIJGetArray(A_VR,&av);CHKERRQ(ierr); 4299 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4300 for (i=0;i<n;i++) { 4301 PetscInt j; 4302 for (j=xadj[i];j<xadj[i+1];j++) marray[i*n_R + adjncy[j]] -= av[j]; 4303 } 4304 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4305 ierr = MatRestoreRowIJ(A_VR,0,PETSC_FALSE,PETSC_FALSE,&n,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 4306 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4307 } 4308 4309 /* currently there's no support for MatTransposeMatSolve(F,B,X) */ 4310 if (n_vertices) { 4311 ierr = MatDenseGetArray(B_V,&marray);CHKERRQ(ierr); 4312 for (i=0;i<n_vertices;i++) { 4313 ierr = VecPlaceArray(pcbddc->vec1_R,marray+i*n_R);CHKERRQ(ierr); 4314 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4315 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4316 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4317 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4318 } 4319 ierr = MatDenseRestoreArray(B_V,&marray);CHKERRQ(ierr); 4320 } 4321 if (B_C) { 4322 ierr = MatDenseGetArray(B_C,&marray);CHKERRQ(ierr); 4323 for (i=n_vertices;i<n_constraints+n_vertices;i++) { 4324 ierr = VecPlaceArray(pcbddc->vec1_R,marray+(i-n_vertices)*n_R);CHKERRQ(ierr); 4325 ierr = VecPlaceArray(pcbddc->vec2_R,work+i*n_R);CHKERRQ(ierr); 4326 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 4327 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4328 ierr = VecResetArray(pcbddc->vec2_R);CHKERRQ(ierr); 4329 } 4330 ierr = MatDenseRestoreArray(B_C,&marray);CHKERRQ(ierr); 4331 } 4332 /* coarse basis functions */ 4333 for (i=0;i<pcbddc->local_primal_size;i++) { 4334 PetscScalar *y; 4335 4336 ierr = VecPlaceArray(pcbddc->vec1_R,work+i*n_R);CHKERRQ(ierr); 4337 ierr = MatDenseGetArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4338 ierr = VecPlaceArray(pcis->vec1_B,y+n_B*i);CHKERRQ(ierr); 4339 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4340 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4341 if (i<n_vertices) { 4342 y[n_B*i+idx_V_B[i]] = 1.0; 4343 } 4344 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_B,&y);CHKERRQ(ierr); 4345 ierr = VecResetArray(pcis->vec1_B);CHKERRQ(ierr); 4346 4347 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4348 ierr = MatDenseGetArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4349 ierr = VecPlaceArray(pcis->vec1_D,y+n_D*i);CHKERRQ(ierr); 4350 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4351 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,pcis->vec1_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4352 ierr = VecResetArray(pcis->vec1_D);CHKERRQ(ierr); 4353 ierr = MatDenseRestoreArray(pcbddc->coarse_psi_D,&y);CHKERRQ(ierr); 4354 } 4355 ierr = VecResetArray(pcbddc->vec1_R);CHKERRQ(ierr); 4356 } 4357 ierr = MatDestroy(&B_V);CHKERRQ(ierr); 4358 ierr = MatDestroy(&B_C);CHKERRQ(ierr); 4359 } 4360 4361 /* free memory */ 4362 ierr = PetscFree(idx_V_B);CHKERRQ(ierr); 4363 ierr = MatDestroy(&S_VV);CHKERRQ(ierr); 4364 ierr = MatDestroy(&S_CV);CHKERRQ(ierr); 4365 ierr = MatDestroy(&S_VC);CHKERRQ(ierr); 4366 ierr = MatDestroy(&S_CC);CHKERRQ(ierr); 4367 ierr = PetscFree(work);CHKERRQ(ierr); 4368 if (n_vertices) { 4369 ierr = MatDestroy(&A_VR);CHKERRQ(ierr); 4370 } 4371 if (n_constraints) { 4372 ierr = MatDestroy(&C_CR);CHKERRQ(ierr); 4373 } 4374 /* Checking coarse_sub_mat and coarse basis functios */ 4375 /* Symmetric case : It should be \Phi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4376 /* Non-symmetric case : It should be \Psi^{(j)^T} A^{(j)} \Phi^{(j)}=coarse_sub_mat */ 4377 if (pcbddc->dbg_flag) { 4378 Mat coarse_sub_mat; 4379 Mat AUXMAT,TM1,TM2,TM3,TM4; 4380 Mat coarse_phi_D,coarse_phi_B; 4381 Mat coarse_psi_D,coarse_psi_B; 4382 Mat A_II,A_BB,A_IB,A_BI; 4383 Mat C_B,CPHI; 4384 IS is_dummy; 4385 Vec mones; 4386 MatType checkmattype=MATSEQAIJ; 4387 PetscReal real_value; 4388 4389 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4390 Mat A; 4391 ierr = PCBDDCBenignProject(pc,NULL,NULL,&A);CHKERRQ(ierr); 4392 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4393 ierr = MatCreateSubMatrix(A,pcis->is_I_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4394 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_I_local,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4395 ierr = MatCreateSubMatrix(A,pcis->is_B_local,pcis->is_B_local,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4396 ierr = MatDestroy(&A);CHKERRQ(ierr); 4397 } else { 4398 ierr = MatConvert(pcis->A_II,checkmattype,MAT_INITIAL_MATRIX,&A_II);CHKERRQ(ierr); 4399 ierr = MatConvert(pcis->A_IB,checkmattype,MAT_INITIAL_MATRIX,&A_IB);CHKERRQ(ierr); 4400 ierr = MatConvert(pcis->A_BI,checkmattype,MAT_INITIAL_MATRIX,&A_BI);CHKERRQ(ierr); 4401 ierr = MatConvert(pcis->A_BB,checkmattype,MAT_INITIAL_MATRIX,&A_BB);CHKERRQ(ierr); 4402 } 4403 ierr = MatConvert(pcbddc->coarse_phi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_D);CHKERRQ(ierr); 4404 ierr = MatConvert(pcbddc->coarse_phi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_phi_B);CHKERRQ(ierr); 4405 if (!pcbddc->symmetric_primal) { 4406 ierr = MatConvert(pcbddc->coarse_psi_D,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_D);CHKERRQ(ierr); 4407 ierr = MatConvert(pcbddc->coarse_psi_B,checkmattype,MAT_INITIAL_MATRIX,&coarse_psi_B);CHKERRQ(ierr); 4408 } 4409 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,coarse_submat_vals,&coarse_sub_mat);CHKERRQ(ierr); 4410 4411 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4412 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse sub mat computation (symmetric %d)\n",pcbddc->symmetric_primal);CHKERRQ(ierr); 4413 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4414 if (!pcbddc->symmetric_primal) { 4415 ierr = MatMatMult(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4416 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4417 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4418 ierr = MatMatMult(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4419 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4420 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4421 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4422 ierr = MatTransposeMatMult(coarse_psi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4423 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4424 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4425 ierr = MatTransposeMatMult(coarse_psi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4426 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4427 } else { 4428 ierr = MatPtAP(A_II,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&TM1);CHKERRQ(ierr); 4429 ierr = MatPtAP(A_BB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&TM2);CHKERRQ(ierr); 4430 ierr = MatMatMult(A_IB,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4431 ierr = MatTransposeMatMult(coarse_phi_D,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM3);CHKERRQ(ierr); 4432 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4433 ierr = MatMatMult(A_BI,coarse_phi_D,MAT_INITIAL_MATRIX,1.0,&AUXMAT);CHKERRQ(ierr); 4434 ierr = MatTransposeMatMult(coarse_phi_B,AUXMAT,MAT_INITIAL_MATRIX,1.0,&TM4);CHKERRQ(ierr); 4435 ierr = MatDestroy(&AUXMAT);CHKERRQ(ierr); 4436 } 4437 ierr = MatAXPY(TM1,one,TM2,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4438 ierr = MatAXPY(TM1,one,TM3,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4439 ierr = MatAXPY(TM1,one,TM4,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4440 ierr = MatConvert(TM1,MATSEQDENSE,MAT_INPLACE_MATRIX,&TM1);CHKERRQ(ierr); 4441 if (pcbddc->benign_n) { 4442 Mat B0_B,B0_BPHI; 4443 PetscScalar *data,*data2; 4444 PetscInt j; 4445 4446 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4447 ierr = MatCreateSubMatrix(pcbddc->benign_B0,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 4448 ierr = MatMatMult(B0_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&B0_BPHI);CHKERRQ(ierr); 4449 ierr = MatConvert(B0_BPHI,MATSEQDENSE,MAT_INPLACE_MATRIX,&B0_BPHI);CHKERRQ(ierr); 4450 ierr = MatDenseGetArray(TM1,&data);CHKERRQ(ierr); 4451 ierr = MatDenseGetArray(B0_BPHI,&data2);CHKERRQ(ierr); 4452 for (j=0;j<pcbddc->benign_n;j++) { 4453 PetscInt primal_idx = pcbddc->local_primal_size - pcbddc->benign_n + j; 4454 for (i=0;i<pcbddc->local_primal_size;i++) { 4455 data[primal_idx*pcbddc->local_primal_size+i] += data2[i*pcbddc->benign_n+j]; 4456 data[i*pcbddc->local_primal_size+primal_idx] += data2[i*pcbddc->benign_n+j]; 4457 } 4458 } 4459 ierr = MatDenseRestoreArray(TM1,&data);CHKERRQ(ierr); 4460 ierr = MatDenseRestoreArray(B0_BPHI,&data2);CHKERRQ(ierr); 4461 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 4462 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4463 ierr = MatDestroy(&B0_BPHI);CHKERRQ(ierr); 4464 } 4465 #if 0 4466 { 4467 PetscViewer viewer; 4468 char filename[256]; 4469 sprintf(filename,"details_local_coarse_mat%d_level%d.m",PetscGlobalRank,pcbddc->current_level); 4470 ierr = PetscViewerASCIIOpen(PETSC_COMM_SELF,filename,&viewer);CHKERRQ(ierr); 4471 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 4472 ierr = PetscObjectSetName((PetscObject)coarse_sub_mat,"computed");CHKERRQ(ierr); 4473 ierr = MatView(coarse_sub_mat,viewer);CHKERRQ(ierr); 4474 ierr = PetscObjectSetName((PetscObject)TM1,"projected");CHKERRQ(ierr); 4475 ierr = MatView(TM1,viewer);CHKERRQ(ierr); 4476 if (pcbddc->coarse_phi_B) { 4477 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_B,"phi_B");CHKERRQ(ierr); 4478 ierr = MatView(pcbddc->coarse_phi_B,viewer);CHKERRQ(ierr); 4479 } 4480 if (pcbddc->coarse_phi_D) { 4481 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_phi_D,"phi_D");CHKERRQ(ierr); 4482 ierr = MatView(pcbddc->coarse_phi_D,viewer);CHKERRQ(ierr); 4483 } 4484 if (pcbddc->coarse_psi_B) { 4485 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_B,"psi_B");CHKERRQ(ierr); 4486 ierr = MatView(pcbddc->coarse_psi_B,viewer);CHKERRQ(ierr); 4487 } 4488 if (pcbddc->coarse_psi_D) { 4489 ierr = PetscObjectSetName((PetscObject)pcbddc->coarse_psi_D,"psi_D");CHKERRQ(ierr); 4490 ierr = MatView(pcbddc->coarse_psi_D,viewer);CHKERRQ(ierr); 4491 } 4492 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 4493 } 4494 #endif 4495 ierr = MatAXPY(TM1,m_one,coarse_sub_mat,DIFFERENT_NONZERO_PATTERN);CHKERRQ(ierr); 4496 ierr = MatNorm(TM1,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4497 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4498 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d matrix error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4499 4500 /* check constraints */ 4501 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->local_primal_size-pcbddc->benign_n,0,1,&is_dummy);CHKERRQ(ierr); 4502 ierr = MatCreateSubMatrix(pcbddc->ConstraintMatrix,is_dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&C_B);CHKERRQ(ierr); 4503 if (!pcbddc->benign_n) { /* TODO: add benign case */ 4504 ierr = MatMatMult(C_B,coarse_phi_B,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4505 } else { 4506 PetscScalar *data; 4507 Mat tmat; 4508 ierr = MatDenseGetArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4509 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcis->n_B,pcbddc->local_primal_size-pcbddc->benign_n,data,&tmat);CHKERRQ(ierr); 4510 ierr = MatDenseRestoreArray(pcbddc->coarse_phi_B,&data);CHKERRQ(ierr); 4511 ierr = MatMatMult(C_B,tmat,MAT_INITIAL_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4512 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 4513 } 4514 ierr = MatCreateVecs(CPHI,&mones,NULL);CHKERRQ(ierr); 4515 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4516 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4517 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4518 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d phi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4519 if (!pcbddc->symmetric_primal) { 4520 ierr = MatMatMult(C_B,coarse_psi_B,MAT_REUSE_MATRIX,1.0,&CPHI);CHKERRQ(ierr); 4521 ierr = VecSet(mones,-1.0);CHKERRQ(ierr); 4522 ierr = MatDiagonalSet(CPHI,mones,ADD_VALUES);CHKERRQ(ierr); 4523 ierr = MatNorm(CPHI,NORM_FROBENIUS,&real_value);CHKERRQ(ierr); 4524 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d psi constraints error % 1.14e\n",PetscGlobalRank,real_value);CHKERRQ(ierr); 4525 } 4526 ierr = MatDestroy(&C_B);CHKERRQ(ierr); 4527 ierr = MatDestroy(&CPHI);CHKERRQ(ierr); 4528 ierr = ISDestroy(&is_dummy);CHKERRQ(ierr); 4529 ierr = VecDestroy(&mones);CHKERRQ(ierr); 4530 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4531 ierr = MatDestroy(&A_II);CHKERRQ(ierr); 4532 ierr = MatDestroy(&A_BB);CHKERRQ(ierr); 4533 ierr = MatDestroy(&A_IB);CHKERRQ(ierr); 4534 ierr = MatDestroy(&A_BI);CHKERRQ(ierr); 4535 ierr = MatDestroy(&TM1);CHKERRQ(ierr); 4536 ierr = MatDestroy(&TM2);CHKERRQ(ierr); 4537 ierr = MatDestroy(&TM3);CHKERRQ(ierr); 4538 ierr = MatDestroy(&TM4);CHKERRQ(ierr); 4539 ierr = MatDestroy(&coarse_phi_D);CHKERRQ(ierr); 4540 ierr = MatDestroy(&coarse_phi_B);CHKERRQ(ierr); 4541 if (!pcbddc->symmetric_primal) { 4542 ierr = MatDestroy(&coarse_psi_D);CHKERRQ(ierr); 4543 ierr = MatDestroy(&coarse_psi_B);CHKERRQ(ierr); 4544 } 4545 ierr = MatDestroy(&coarse_sub_mat);CHKERRQ(ierr); 4546 } 4547 /* get back data */ 4548 *coarse_submat_vals_n = coarse_submat_vals; 4549 PetscFunctionReturn(0); 4550 } 4551 4552 PetscErrorCode MatCreateSubMatrixUnsorted(Mat A, IS isrow, IS iscol, Mat* B) 4553 { 4554 Mat *work_mat; 4555 IS isrow_s,iscol_s; 4556 PetscBool rsorted,csorted; 4557 PetscInt rsize,*idxs_perm_r=NULL,csize,*idxs_perm_c=NULL; 4558 PetscErrorCode ierr; 4559 4560 PetscFunctionBegin; 4561 ierr = ISSorted(isrow,&rsorted);CHKERRQ(ierr); 4562 ierr = ISSorted(iscol,&csorted);CHKERRQ(ierr); 4563 ierr = ISGetLocalSize(isrow,&rsize);CHKERRQ(ierr); 4564 ierr = ISGetLocalSize(iscol,&csize);CHKERRQ(ierr); 4565 4566 if (!rsorted) { 4567 const PetscInt *idxs; 4568 PetscInt *idxs_sorted,i; 4569 4570 ierr = PetscMalloc1(rsize,&idxs_perm_r);CHKERRQ(ierr); 4571 ierr = PetscMalloc1(rsize,&idxs_sorted);CHKERRQ(ierr); 4572 for (i=0;i<rsize;i++) { 4573 idxs_perm_r[i] = i; 4574 } 4575 ierr = ISGetIndices(isrow,&idxs);CHKERRQ(ierr); 4576 ierr = PetscSortIntWithPermutation(rsize,idxs,idxs_perm_r);CHKERRQ(ierr); 4577 for (i=0;i<rsize;i++) { 4578 idxs_sorted[i] = idxs[idxs_perm_r[i]]; 4579 } 4580 ierr = ISRestoreIndices(isrow,&idxs);CHKERRQ(ierr); 4581 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_sorted,PETSC_OWN_POINTER,&isrow_s);CHKERRQ(ierr); 4582 } else { 4583 ierr = PetscObjectReference((PetscObject)isrow);CHKERRQ(ierr); 4584 isrow_s = isrow; 4585 } 4586 4587 if (!csorted) { 4588 if (isrow == iscol) { 4589 ierr = PetscObjectReference((PetscObject)isrow_s);CHKERRQ(ierr); 4590 iscol_s = isrow_s; 4591 } else { 4592 const PetscInt *idxs; 4593 PetscInt *idxs_sorted,i; 4594 4595 ierr = PetscMalloc1(csize,&idxs_perm_c);CHKERRQ(ierr); 4596 ierr = PetscMalloc1(csize,&idxs_sorted);CHKERRQ(ierr); 4597 for (i=0;i<csize;i++) { 4598 idxs_perm_c[i] = i; 4599 } 4600 ierr = ISGetIndices(iscol,&idxs);CHKERRQ(ierr); 4601 ierr = PetscSortIntWithPermutation(csize,idxs,idxs_perm_c);CHKERRQ(ierr); 4602 for (i=0;i<csize;i++) { 4603 idxs_sorted[i] = idxs[idxs_perm_c[i]]; 4604 } 4605 ierr = ISRestoreIndices(iscol,&idxs);CHKERRQ(ierr); 4606 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_sorted,PETSC_OWN_POINTER,&iscol_s);CHKERRQ(ierr); 4607 } 4608 } else { 4609 ierr = PetscObjectReference((PetscObject)iscol);CHKERRQ(ierr); 4610 iscol_s = iscol; 4611 } 4612 4613 ierr = MatCreateSubMatrices(A,1,&isrow_s,&iscol_s,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4614 4615 if (!rsorted || !csorted) { 4616 Mat new_mat; 4617 IS is_perm_r,is_perm_c; 4618 4619 if (!rsorted) { 4620 PetscInt *idxs_r,i; 4621 ierr = PetscMalloc1(rsize,&idxs_r);CHKERRQ(ierr); 4622 for (i=0;i<rsize;i++) { 4623 idxs_r[idxs_perm_r[i]] = i; 4624 } 4625 ierr = PetscFree(idxs_perm_r);CHKERRQ(ierr); 4626 ierr = ISCreateGeneral(PETSC_COMM_SELF,rsize,idxs_r,PETSC_OWN_POINTER,&is_perm_r);CHKERRQ(ierr); 4627 } else { 4628 ierr = ISCreateStride(PETSC_COMM_SELF,rsize,0,1,&is_perm_r);CHKERRQ(ierr); 4629 } 4630 ierr = ISSetPermutation(is_perm_r);CHKERRQ(ierr); 4631 4632 if (!csorted) { 4633 if (isrow_s == iscol_s) { 4634 ierr = PetscObjectReference((PetscObject)is_perm_r);CHKERRQ(ierr); 4635 is_perm_c = is_perm_r; 4636 } else { 4637 PetscInt *idxs_c,i; 4638 if (!idxs_perm_c) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Permutation array not present"); 4639 ierr = PetscMalloc1(csize,&idxs_c);CHKERRQ(ierr); 4640 for (i=0;i<csize;i++) { 4641 idxs_c[idxs_perm_c[i]] = i; 4642 } 4643 ierr = PetscFree(idxs_perm_c);CHKERRQ(ierr); 4644 ierr = ISCreateGeneral(PETSC_COMM_SELF,csize,idxs_c,PETSC_OWN_POINTER,&is_perm_c);CHKERRQ(ierr); 4645 } 4646 } else { 4647 ierr = ISCreateStride(PETSC_COMM_SELF,csize,0,1,&is_perm_c);CHKERRQ(ierr); 4648 } 4649 ierr = ISSetPermutation(is_perm_c);CHKERRQ(ierr); 4650 4651 ierr = MatPermute(work_mat[0],is_perm_r,is_perm_c,&new_mat);CHKERRQ(ierr); 4652 ierr = MatDestroy(&work_mat[0]);CHKERRQ(ierr); 4653 work_mat[0] = new_mat; 4654 ierr = ISDestroy(&is_perm_r);CHKERRQ(ierr); 4655 ierr = ISDestroy(&is_perm_c);CHKERRQ(ierr); 4656 } 4657 4658 ierr = PetscObjectReference((PetscObject)work_mat[0]);CHKERRQ(ierr); 4659 *B = work_mat[0]; 4660 ierr = MatDestroyMatrices(1,&work_mat);CHKERRQ(ierr); 4661 ierr = ISDestroy(&isrow_s);CHKERRQ(ierr); 4662 ierr = ISDestroy(&iscol_s);CHKERRQ(ierr); 4663 PetscFunctionReturn(0); 4664 } 4665 4666 PetscErrorCode PCBDDCComputeLocalMatrix(PC pc, Mat ChangeOfBasisMatrix) 4667 { 4668 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 4669 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4670 Mat new_mat,lA; 4671 IS is_local,is_global; 4672 PetscInt local_size; 4673 PetscBool isseqaij; 4674 PetscErrorCode ierr; 4675 4676 PetscFunctionBegin; 4677 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4678 ierr = MatGetSize(matis->A,&local_size,NULL);CHKERRQ(ierr); 4679 ierr = ISCreateStride(PetscObjectComm((PetscObject)matis->A),local_size,0,1,&is_local);CHKERRQ(ierr); 4680 ierr = ISLocalToGlobalMappingApplyIS(pc->pmat->rmap->mapping,is_local,&is_global);CHKERRQ(ierr); 4681 ierr = ISDestroy(&is_local);CHKERRQ(ierr); 4682 ierr = MatCreateSubMatrixUnsorted(ChangeOfBasisMatrix,is_global,is_global,&new_mat);CHKERRQ(ierr); 4683 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 4684 4685 /* check */ 4686 if (pcbddc->dbg_flag) { 4687 Vec x,x_change; 4688 PetscReal error; 4689 4690 ierr = MatCreateVecs(ChangeOfBasisMatrix,&x,&x_change);CHKERRQ(ierr); 4691 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 4692 ierr = MatMult(ChangeOfBasisMatrix,x,x_change);CHKERRQ(ierr); 4693 ierr = VecScatterBegin(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4694 ierr = VecScatterEnd(matis->cctx,x,matis->x,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 4695 ierr = MatMult(new_mat,matis->x,matis->y);CHKERRQ(ierr); 4696 if (!pcbddc->change_interior) { 4697 const PetscScalar *x,*y,*v; 4698 PetscReal lerror = 0.; 4699 PetscInt i; 4700 4701 ierr = VecGetArrayRead(matis->x,&x);CHKERRQ(ierr); 4702 ierr = VecGetArrayRead(matis->y,&y);CHKERRQ(ierr); 4703 ierr = VecGetArrayRead(matis->counter,&v);CHKERRQ(ierr); 4704 for (i=0;i<local_size;i++) 4705 if (PetscRealPart(v[i]) < 1.5 && PetscAbsScalar(x[i]-y[i]) > lerror) 4706 lerror = PetscAbsScalar(x[i]-y[i]); 4707 ierr = VecRestoreArrayRead(matis->x,&x);CHKERRQ(ierr); 4708 ierr = VecRestoreArrayRead(matis->y,&y);CHKERRQ(ierr); 4709 ierr = VecRestoreArrayRead(matis->counter,&v);CHKERRQ(ierr); 4710 ierr = MPIU_Allreduce(&lerror,&error,1,MPIU_REAL,MPI_MAX,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 4711 if (error > PETSC_SMALL) { 4712 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4713 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on I: %1.6e\n",error); 4714 } else { 4715 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on I: %1.6e\n",error); 4716 } 4717 } 4718 } 4719 ierr = VecScatterBegin(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4720 ierr = VecScatterEnd(matis->rctx,matis->y,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 4721 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 4722 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 4723 if (error > PETSC_SMALL) { 4724 if (!pcbddc->user_ChangeOfBasisMatrix || pcbddc->current_level) { 4725 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 4726 } else { 4727 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_USER,"Error global vs local change on N: %1.6e\n",error); 4728 } 4729 } 4730 ierr = VecDestroy(&x);CHKERRQ(ierr); 4731 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 4732 } 4733 4734 /* lA is present if we are setting up an inner BDDC for a saddle point FETI-DP */ 4735 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject*)&lA);CHKERRQ(ierr); 4736 4737 /* TODO: HOW TO WORK WITH BAIJ and SBAIJ and SEQDENSE? */ 4738 ierr = PetscObjectTypeCompare((PetscObject)matis->A,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 4739 if (isseqaij) { 4740 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4741 ierr = MatPtAP(matis->A,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4742 if (lA) { 4743 Mat work; 4744 ierr = MatPtAP(lA,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4745 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4746 ierr = MatDestroy(&work);CHKERRQ(ierr); 4747 } 4748 } else { 4749 Mat work_mat; 4750 4751 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 4752 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4753 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&pcbddc->local_mat);CHKERRQ(ierr); 4754 ierr = MatDestroy(&work_mat);CHKERRQ(ierr); 4755 if (lA) { 4756 Mat work; 4757 ierr = MatConvert(lA,MATSEQAIJ,MAT_INITIAL_MATRIX,&work_mat);CHKERRQ(ierr); 4758 ierr = MatPtAP(work_mat,new_mat,MAT_INITIAL_MATRIX,2.0,&work);CHKERRQ(ierr); 4759 ierr = PetscObjectCompose((PetscObject)pc,"__KSPFETIDP_lA" ,(PetscObject)work);CHKERRQ(ierr); 4760 ierr = MatDestroy(&work);CHKERRQ(ierr); 4761 } 4762 } 4763 if (matis->A->symmetric_set) { 4764 ierr = MatSetOption(pcbddc->local_mat,MAT_SYMMETRIC,matis->A->symmetric);CHKERRQ(ierr); 4765 #if !defined(PETSC_USE_COMPLEX) 4766 ierr = MatSetOption(pcbddc->local_mat,MAT_HERMITIAN,matis->A->symmetric);CHKERRQ(ierr); 4767 #endif 4768 } 4769 ierr = MatDestroy(&new_mat);CHKERRQ(ierr); 4770 PetscFunctionReturn(0); 4771 } 4772 4773 PetscErrorCode PCBDDCSetUpLocalScatters(PC pc) 4774 { 4775 PC_IS* pcis = (PC_IS*)(pc->data); 4776 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 4777 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4778 PetscInt *idx_R_local=NULL; 4779 PetscInt n_vertices,i,j,n_R,n_D,n_B; 4780 PetscInt vbs,bs; 4781 PetscBT bitmask=NULL; 4782 PetscErrorCode ierr; 4783 4784 PetscFunctionBegin; 4785 /* 4786 No need to setup local scatters if 4787 - primal space is unchanged 4788 AND 4789 - we actually have locally some primal dofs (could not be true in multilevel or for isolated subdomains) 4790 AND 4791 - we are not in debugging mode (this is needed since there are Synchronized prints at the end of the subroutine 4792 */ 4793 if (!pcbddc->new_primal_space_local && pcbddc->local_primal_size && !pcbddc->dbg_flag) { 4794 PetscFunctionReturn(0); 4795 } 4796 /* destroy old objects */ 4797 ierr = ISDestroy(&pcbddc->is_R_local);CHKERRQ(ierr); 4798 ierr = VecScatterDestroy(&pcbddc->R_to_B);CHKERRQ(ierr); 4799 ierr = VecScatterDestroy(&pcbddc->R_to_D);CHKERRQ(ierr); 4800 /* Set Non-overlapping dimensions */ 4801 n_B = pcis->n_B; 4802 n_D = pcis->n - n_B; 4803 n_vertices = pcbddc->n_vertices; 4804 4805 /* Dohrmann's notation: dofs splitted in R (Remaining: all dofs but the vertices) and V (Vertices) */ 4806 4807 /* create auxiliary bitmask and allocate workspace */ 4808 if (!sub_schurs || !sub_schurs->reuse_solver) { 4809 ierr = PetscMalloc1(pcis->n-n_vertices,&idx_R_local);CHKERRQ(ierr); 4810 ierr = PetscBTCreate(pcis->n,&bitmask);CHKERRQ(ierr); 4811 for (i=0;i<n_vertices;i++) { 4812 ierr = PetscBTSet(bitmask,pcbddc->local_primal_ref_node[i]);CHKERRQ(ierr); 4813 } 4814 4815 for (i=0, n_R=0; i<pcis->n; i++) { 4816 if (!PetscBTLookup(bitmask,i)) { 4817 idx_R_local[n_R++] = i; 4818 } 4819 } 4820 } else { /* A different ordering (already computed) is present if we are reusing the Schur solver */ 4821 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4822 4823 ierr = ISGetIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4824 ierr = ISGetLocalSize(reuse_solver->is_R,&n_R);CHKERRQ(ierr); 4825 } 4826 4827 /* Block code */ 4828 vbs = 1; 4829 ierr = MatGetBlockSize(pcbddc->local_mat,&bs);CHKERRQ(ierr); 4830 if (bs>1 && !(n_vertices%bs)) { 4831 PetscBool is_blocked = PETSC_TRUE; 4832 PetscInt *vary; 4833 if (!sub_schurs || !sub_schurs->reuse_solver) { 4834 ierr = PetscMalloc1(pcis->n/bs,&vary);CHKERRQ(ierr); 4835 ierr = PetscMemzero(vary,pcis->n/bs*sizeof(PetscInt));CHKERRQ(ierr); 4836 /* Verify that the vertex indices correspond to each element in a block (code taken from sbaij2.c) */ 4837 /* it is ok to check this way since local_primal_ref_node are always sorted by local numbering and idx_R_local is obtained as a complement */ 4838 for (i=0; i<n_vertices; i++) vary[pcbddc->local_primal_ref_node[i]/bs]++; 4839 for (i=0; i<pcis->n/bs; i++) { 4840 if (vary[i]!=0 && vary[i]!=bs) { 4841 is_blocked = PETSC_FALSE; 4842 break; 4843 } 4844 } 4845 ierr = PetscFree(vary);CHKERRQ(ierr); 4846 } else { 4847 /* Verify directly the R set */ 4848 for (i=0; i<n_R/bs; i++) { 4849 PetscInt j,node=idx_R_local[bs*i]; 4850 for (j=1; j<bs; j++) { 4851 if (node != idx_R_local[bs*i+j]-j) { 4852 is_blocked = PETSC_FALSE; 4853 break; 4854 } 4855 } 4856 } 4857 } 4858 if (is_blocked) { /* build compressed IS for R nodes (complement of vertices) */ 4859 vbs = bs; 4860 for (i=0;i<n_R/vbs;i++) { 4861 idx_R_local[i] = idx_R_local[vbs*i]/vbs; 4862 } 4863 } 4864 } 4865 ierr = ISCreateBlock(PETSC_COMM_SELF,vbs,n_R/vbs,idx_R_local,PETSC_COPY_VALUES,&pcbddc->is_R_local);CHKERRQ(ierr); 4866 if (sub_schurs && sub_schurs->reuse_solver) { 4867 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4868 4869 ierr = ISRestoreIndices(reuse_solver->is_R,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4870 ierr = ISDestroy(&reuse_solver->is_R);CHKERRQ(ierr); 4871 ierr = PetscObjectReference((PetscObject)pcbddc->is_R_local);CHKERRQ(ierr); 4872 reuse_solver->is_R = pcbddc->is_R_local; 4873 } else { 4874 ierr = PetscFree(idx_R_local);CHKERRQ(ierr); 4875 } 4876 4877 /* print some info if requested */ 4878 if (pcbddc->dbg_flag) { 4879 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 4880 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4881 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 4882 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d local dimensions\n",PetscGlobalRank);CHKERRQ(ierr); 4883 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_size = %d, dirichlet_size = %d, boundary_size = %d\n",pcis->n,n_D,n_B);CHKERRQ(ierr); 4884 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"r_size = %d, v_size = %d, constraints = %d, local_primal_size = %d\n",n_R,n_vertices,pcbddc->local_primal_size-n_vertices-pcbddc->benign_n,pcbddc->local_primal_size);CHKERRQ(ierr); 4885 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 4886 } 4887 4888 /* VecScatters pcbddc->R_to_B and (optionally) pcbddc->R_to_D */ 4889 if (!sub_schurs || !sub_schurs->reuse_solver) { 4890 IS is_aux1,is_aux2; 4891 PetscInt *aux_array1,*aux_array2,*is_indices,*idx_R_local; 4892 4893 ierr = ISGetIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4894 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array1);CHKERRQ(ierr); 4895 ierr = PetscMalloc1(pcis->n_B-n_vertices,&aux_array2);CHKERRQ(ierr); 4896 ierr = ISGetIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4897 for (i=0; i<n_D; i++) { 4898 ierr = PetscBTSet(bitmask,is_indices[i]);CHKERRQ(ierr); 4899 } 4900 ierr = ISRestoreIndices(pcis->is_I_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4901 for (i=0, j=0; i<n_R; i++) { 4902 if (!PetscBTLookup(bitmask,idx_R_local[i])) { 4903 aux_array1[j++] = i; 4904 } 4905 } 4906 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4907 ierr = ISGetIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4908 for (i=0, j=0; i<n_B; i++) { 4909 if (!PetscBTLookup(bitmask,is_indices[i])) { 4910 aux_array2[j++] = i; 4911 } 4912 } 4913 ierr = ISRestoreIndices(pcis->is_B_local,(const PetscInt**)&is_indices);CHKERRQ(ierr); 4914 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array2,PETSC_OWN_POINTER,&is_aux2);CHKERRQ(ierr); 4915 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_B,is_aux2,&pcbddc->R_to_B);CHKERRQ(ierr); 4916 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4917 ierr = ISDestroy(&is_aux2);CHKERRQ(ierr); 4918 4919 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4920 ierr = PetscMalloc1(n_D,&aux_array1);CHKERRQ(ierr); 4921 for (i=0, j=0; i<n_R; i++) { 4922 if (PetscBTLookup(bitmask,idx_R_local[i])) { 4923 aux_array1[j++] = i; 4924 } 4925 } 4926 ierr = ISCreateGeneral(PETSC_COMM_SELF,j,aux_array1,PETSC_OWN_POINTER,&is_aux1);CHKERRQ(ierr); 4927 ierr = VecScatterCreate(pcbddc->vec1_R,is_aux1,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4928 ierr = ISDestroy(&is_aux1);CHKERRQ(ierr); 4929 } 4930 ierr = PetscBTDestroy(&bitmask);CHKERRQ(ierr); 4931 ierr = ISRestoreIndices(pcbddc->is_R_local,(const PetscInt**)&idx_R_local);CHKERRQ(ierr); 4932 } else { 4933 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 4934 IS tis; 4935 PetscInt schur_size; 4936 4937 ierr = ISGetLocalSize(reuse_solver->is_B,&schur_size);CHKERRQ(ierr); 4938 ierr = ISCreateStride(PETSC_COMM_SELF,schur_size,n_D,1,&tis);CHKERRQ(ierr); 4939 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_B,reuse_solver->is_B,&pcbddc->R_to_B);CHKERRQ(ierr); 4940 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4941 if (pcbddc->switch_static || pcbddc->dbg_flag) { 4942 ierr = ISCreateStride(PETSC_COMM_SELF,n_D,0,1,&tis);CHKERRQ(ierr); 4943 ierr = VecScatterCreate(pcbddc->vec1_R,tis,pcis->vec1_D,(IS)0,&pcbddc->R_to_D);CHKERRQ(ierr); 4944 ierr = ISDestroy(&tis);CHKERRQ(ierr); 4945 } 4946 } 4947 PetscFunctionReturn(0); 4948 } 4949 4950 4951 PetscErrorCode PCBDDCSetUpLocalSolvers(PC pc, PetscBool dirichlet, PetscBool neumann) 4952 { 4953 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 4954 PC_IS *pcis = (PC_IS*)pc->data; 4955 PC pc_temp; 4956 Mat A_RR; 4957 MatReuse reuse; 4958 PetscScalar m_one = -1.0; 4959 PetscReal value; 4960 PetscInt n_D,n_R; 4961 PetscBool check_corr,issbaij; 4962 PetscErrorCode ierr; 4963 /* prefixes stuff */ 4964 char dir_prefix[256],neu_prefix[256],str_level[16]; 4965 size_t len; 4966 4967 PetscFunctionBegin; 4968 4969 /* compute prefixes */ 4970 ierr = PetscStrcpy(dir_prefix,"");CHKERRQ(ierr); 4971 ierr = PetscStrcpy(neu_prefix,"");CHKERRQ(ierr); 4972 if (!pcbddc->current_level) { 4973 ierr = PetscStrcpy(dir_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4974 ierr = PetscStrcpy(neu_prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 4975 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4976 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4977 } else { 4978 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 4979 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 4980 len -= 15; /* remove "pc_bddc_coarse_" */ 4981 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 4982 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 4983 ierr = PetscStrncpy(dir_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4984 ierr = PetscStrncpy(neu_prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 4985 ierr = PetscStrcat(dir_prefix,"pc_bddc_dirichlet_");CHKERRQ(ierr); 4986 ierr = PetscStrcat(neu_prefix,"pc_bddc_neumann_");CHKERRQ(ierr); 4987 ierr = PetscStrcat(dir_prefix,str_level);CHKERRQ(ierr); 4988 ierr = PetscStrcat(neu_prefix,str_level);CHKERRQ(ierr); 4989 } 4990 4991 /* DIRICHLET PROBLEM */ 4992 if (dirichlet) { 4993 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 4994 if (pcbddc->benign_n && !pcbddc->benign_change_explicit) { 4995 if (!sub_schurs || !sub_schurs->reuse_solver) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Not yet implemented\n"); 4996 if (pcbddc->dbg_flag) { 4997 Mat A_IIn; 4998 4999 ierr = PCBDDCBenignProject(pc,pcis->is_I_local,pcis->is_I_local,&A_IIn);CHKERRQ(ierr); 5000 ierr = MatDestroy(&pcis->A_II);CHKERRQ(ierr); 5001 pcis->A_II = A_IIn; 5002 } 5003 } 5004 if (pcbddc->local_mat->symmetric_set) { 5005 ierr = MatSetOption(pcis->A_II,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5006 } 5007 /* Matrix for Dirichlet problem is pcis->A_II */ 5008 n_D = pcis->n - pcis->n_B; 5009 if (!pcbddc->ksp_D) { /* create object if not yet build */ 5010 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_D);CHKERRQ(ierr); 5011 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_D,(PetscObject)pc,1);CHKERRQ(ierr); 5012 /* default */ 5013 ierr = KSPSetType(pcbddc->ksp_D,KSPPREONLY);CHKERRQ(ierr); 5014 ierr = KSPSetOptionsPrefix(pcbddc->ksp_D,dir_prefix);CHKERRQ(ierr); 5015 ierr = PetscObjectTypeCompare((PetscObject)pcis->A_II,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5016 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5017 if (issbaij) { 5018 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5019 } else { 5020 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5021 } 5022 /* Allow user's customization */ 5023 ierr = KSPSetFromOptions(pcbddc->ksp_D);CHKERRQ(ierr); 5024 } 5025 ierr = KSPSetOperators(pcbddc->ksp_D,pcis->A_II,pcis->A_II);CHKERRQ(ierr); 5026 if (sub_schurs && sub_schurs->reuse_solver) { 5027 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5028 5029 ierr = KSPSetPC(pcbddc->ksp_D,reuse_solver->interior_solver);CHKERRQ(ierr); 5030 } 5031 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5032 if (!n_D) { 5033 ierr = KSPGetPC(pcbddc->ksp_D,&pc_temp);CHKERRQ(ierr); 5034 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5035 } 5036 /* Set Up KSP for Dirichlet problem of BDDC */ 5037 ierr = KSPSetUp(pcbddc->ksp_D);CHKERRQ(ierr); 5038 /* set ksp_D into pcis data */ 5039 ierr = KSPDestroy(&pcis->ksp_D);CHKERRQ(ierr); 5040 ierr = PetscObjectReference((PetscObject)pcbddc->ksp_D);CHKERRQ(ierr); 5041 pcis->ksp_D = pcbddc->ksp_D; 5042 } 5043 5044 /* NEUMANN PROBLEM */ 5045 A_RR = 0; 5046 if (neumann) { 5047 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5048 PetscInt ibs,mbs; 5049 PetscBool issbaij, reuse_neumann_solver; 5050 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5051 5052 reuse_neumann_solver = PETSC_FALSE; 5053 if (sub_schurs && sub_schurs->reuse_solver) { 5054 IS iP; 5055 5056 reuse_neumann_solver = PETSC_TRUE; 5057 ierr = PetscObjectQuery((PetscObject)sub_schurs->A,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 5058 if (iP) reuse_neumann_solver = PETSC_FALSE; 5059 } 5060 /* Matrix for Neumann problem is A_RR -> we need to create/reuse it at this point */ 5061 ierr = ISGetSize(pcbddc->is_R_local,&n_R);CHKERRQ(ierr); 5062 if (pcbddc->ksp_R) { /* already created ksp */ 5063 PetscInt nn_R; 5064 ierr = KSPGetOperators(pcbddc->ksp_R,NULL,&A_RR);CHKERRQ(ierr); 5065 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5066 ierr = MatGetSize(A_RR,&nn_R,NULL);CHKERRQ(ierr); 5067 if (nn_R != n_R) { /* old ksp is not reusable, so reset it */ 5068 ierr = KSPReset(pcbddc->ksp_R);CHKERRQ(ierr); 5069 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5070 reuse = MAT_INITIAL_MATRIX; 5071 } else { /* same sizes, but nonzero pattern depend on primal vertices so it can be changed */ 5072 if (pcbddc->new_primal_space_local) { /* we are not sure the matrix will have the same nonzero pattern */ 5073 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5074 reuse = MAT_INITIAL_MATRIX; 5075 } else { /* safe to reuse the matrix */ 5076 reuse = MAT_REUSE_MATRIX; 5077 } 5078 } 5079 /* last check */ 5080 if (pc->flag == DIFFERENT_NONZERO_PATTERN) { 5081 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5082 reuse = MAT_INITIAL_MATRIX; 5083 } 5084 } else { /* first time, so we need to create the matrix */ 5085 reuse = MAT_INITIAL_MATRIX; 5086 } 5087 /* convert pcbddc->local_mat if needed later in PCBDDCSetUpCorrection */ 5088 ierr = MatGetBlockSize(pcbddc->local_mat,&mbs);CHKERRQ(ierr); 5089 ierr = ISGetBlockSize(pcbddc->is_R_local,&ibs);CHKERRQ(ierr); 5090 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5091 if (ibs != mbs) { /* need to convert to SEQAIJ to extract any submatrix with is_R_local */ 5092 if (matis->A == pcbddc->local_mat) { 5093 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5094 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5095 } else { 5096 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5097 } 5098 } else if (issbaij) { /* need to convert to BAIJ to get offdiagonal blocks */ 5099 if (matis->A == pcbddc->local_mat) { 5100 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 5101 ierr = MatConvert(matis->A,MATSEQBAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5102 } else { 5103 ierr = MatConvert(pcbddc->local_mat,MATSEQBAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 5104 } 5105 } 5106 /* extract A_RR */ 5107 if (reuse_neumann_solver) { 5108 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5109 5110 if (pcbddc->dbg_flag) { /* we need A_RR to test the solver later */ 5111 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5112 if (reuse_solver->benign_n) { /* we are not using the explicit change of basis on the pressures */ 5113 ierr = PCBDDCBenignProject(pc,pcbddc->is_R_local,pcbddc->is_R_local,&A_RR);CHKERRQ(ierr); 5114 } else { 5115 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,MAT_INITIAL_MATRIX,&A_RR);CHKERRQ(ierr); 5116 } 5117 } else { 5118 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5119 ierr = PCGetOperators(reuse_solver->correction_solver,&A_RR,NULL);CHKERRQ(ierr); 5120 ierr = PetscObjectReference((PetscObject)A_RR);CHKERRQ(ierr); 5121 } 5122 } else { /* we have to build the neumann solver, so we need to extract the relevant matrix */ 5123 ierr = MatCreateSubMatrix(pcbddc->local_mat,pcbddc->is_R_local,pcbddc->is_R_local,reuse,&A_RR);CHKERRQ(ierr); 5124 } 5125 if (pcbddc->local_mat->symmetric_set) { 5126 ierr = MatSetOption(A_RR,MAT_SYMMETRIC,pcbddc->local_mat->symmetric_set);CHKERRQ(ierr); 5127 } 5128 if (!pcbddc->ksp_R) { /* create object if not present */ 5129 ierr = KSPCreate(PETSC_COMM_SELF,&pcbddc->ksp_R);CHKERRQ(ierr); 5130 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->ksp_R,(PetscObject)pc,1);CHKERRQ(ierr); 5131 /* default */ 5132 ierr = KSPSetType(pcbddc->ksp_R,KSPPREONLY);CHKERRQ(ierr); 5133 ierr = KSPSetOptionsPrefix(pcbddc->ksp_R,neu_prefix);CHKERRQ(ierr); 5134 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5135 ierr = PetscObjectTypeCompare((PetscObject)A_RR,MATSEQSBAIJ,&issbaij);CHKERRQ(ierr); 5136 if (issbaij) { 5137 ierr = PCSetType(pc_temp,PCCHOLESKY);CHKERRQ(ierr); 5138 } else { 5139 ierr = PCSetType(pc_temp,PCLU);CHKERRQ(ierr); 5140 } 5141 /* Allow user's customization */ 5142 ierr = KSPSetFromOptions(pcbddc->ksp_R);CHKERRQ(ierr); 5143 } 5144 /* umfpack interface has a bug when matrix dimension is zero. TODO solve from umfpack interface */ 5145 if (!n_R) { 5146 ierr = KSPGetPC(pcbddc->ksp_R,&pc_temp);CHKERRQ(ierr); 5147 ierr = PCSetType(pc_temp,PCNONE);CHKERRQ(ierr); 5148 } 5149 ierr = KSPSetOperators(pcbddc->ksp_R,A_RR,A_RR);CHKERRQ(ierr); 5150 /* Reuse solver if it is present */ 5151 if (reuse_neumann_solver) { 5152 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5153 5154 ierr = KSPSetPC(pcbddc->ksp_R,reuse_solver->correction_solver);CHKERRQ(ierr); 5155 } 5156 /* Set Up KSP for Neumann problem of BDDC */ 5157 ierr = KSPSetUp(pcbddc->ksp_R);CHKERRQ(ierr); 5158 } 5159 5160 if (pcbddc->dbg_flag) { 5161 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5162 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5163 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 5164 } 5165 5166 /* adapt Dirichlet and Neumann solvers if a nullspace correction has been requested */ 5167 check_corr = PETSC_FALSE; 5168 if (pcbddc->NullSpace_corr[0]) { 5169 ierr = PCBDDCSetUseExactDirichlet(pc,PETSC_FALSE);CHKERRQ(ierr); 5170 } 5171 if (dirichlet && pcbddc->NullSpace_corr[0] && !pcbddc->switch_static) { 5172 check_corr = PETSC_TRUE; 5173 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_TRUE,pcbddc->NullSpace_corr[1]);CHKERRQ(ierr); 5174 } 5175 if (neumann && pcbddc->NullSpace_corr[2]) { 5176 check_corr = PETSC_TRUE; 5177 ierr = PCBDDCNullSpaceAssembleCorrection(pc,PETSC_FALSE,pcbddc->NullSpace_corr[3]);CHKERRQ(ierr); 5178 } 5179 /* check Dirichlet and Neumann solvers */ 5180 if (pcbddc->dbg_flag) { 5181 if (dirichlet) { /* Dirichlet */ 5182 ierr = VecSetRandom(pcis->vec1_D,NULL);CHKERRQ(ierr); 5183 ierr = MatMult(pcis->A_II,pcis->vec1_D,pcis->vec2_D);CHKERRQ(ierr); 5184 ierr = KSPSolve(pcbddc->ksp_D,pcis->vec2_D,pcis->vec2_D);CHKERRQ(ierr); 5185 ierr = VecAXPY(pcis->vec1_D,m_one,pcis->vec2_D);CHKERRQ(ierr); 5186 ierr = VecNorm(pcis->vec1_D,NORM_INFINITY,&value);CHKERRQ(ierr); 5187 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Dirichlet solve (%s) = % 1.14e \n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_D))->prefix,value);CHKERRQ(ierr); 5188 if (check_corr) { 5189 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_TRUE);CHKERRQ(ierr); 5190 } 5191 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5192 } 5193 if (neumann) { /* Neumann */ 5194 ierr = VecSetRandom(pcbddc->vec1_R,NULL);CHKERRQ(ierr); 5195 ierr = MatMult(A_RR,pcbddc->vec1_R,pcbddc->vec2_R);CHKERRQ(ierr); 5196 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec2_R,pcbddc->vec2_R);CHKERRQ(ierr); 5197 ierr = VecAXPY(pcbddc->vec1_R,m_one,pcbddc->vec2_R);CHKERRQ(ierr); 5198 ierr = VecNorm(pcbddc->vec1_R,NORM_INFINITY,&value);CHKERRQ(ierr); 5199 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d infinity error for Neumann solve (%s) = % 1.14e\n",PetscGlobalRank,((PetscObject)(pcbddc->ksp_R))->prefix,value);CHKERRQ(ierr); 5200 if (check_corr) { 5201 ierr = PCBDDCNullSpaceCheckCorrection(pc,PETSC_FALSE);CHKERRQ(ierr); 5202 } 5203 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5204 } 5205 } 5206 /* free Neumann problem's matrix */ 5207 ierr = MatDestroy(&A_RR);CHKERRQ(ierr); 5208 PetscFunctionReturn(0); 5209 } 5210 5211 static PetscErrorCode PCBDDCSolveSubstructureCorrection(PC pc, Vec inout_B, Vec inout_D, PetscBool applytranspose) 5212 { 5213 PetscErrorCode ierr; 5214 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5215 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5216 PetscBool reuse_solver = sub_schurs ? ( sub_schurs->reuse_solver ? PETSC_TRUE : PETSC_FALSE ) : PETSC_FALSE; 5217 5218 PetscFunctionBegin; 5219 if (!reuse_solver) { 5220 ierr = VecSet(pcbddc->vec1_R,0.);CHKERRQ(ierr); 5221 } 5222 if (!pcbddc->switch_static) { 5223 if (applytranspose && pcbddc->local_auxmat1) { 5224 ierr = MatMultTranspose(pcbddc->local_auxmat2,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5225 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5226 } 5227 if (!reuse_solver) { 5228 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5229 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5230 } else { 5231 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5232 5233 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5234 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,inout_B,reuse_solver->rhs_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5235 } 5236 } else { 5237 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5238 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5239 ierr = VecScatterBegin(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5240 ierr = VecScatterEnd(pcbddc->R_to_D,inout_D,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5241 if (applytranspose && pcbddc->local_auxmat1) { 5242 ierr = MatMultTranspose(pcbddc->local_auxmat2,pcbddc->vec1_R,pcbddc->vec1_C);CHKERRQ(ierr); 5243 ierr = MatMultTransposeAdd(pcbddc->local_auxmat1,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5244 ierr = VecScatterBegin(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5245 ierr = VecScatterEnd(pcbddc->R_to_B,inout_B,pcbddc->vec1_R,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5246 } 5247 } 5248 if (!reuse_solver || pcbddc->switch_static) { 5249 if (applytranspose) { 5250 ierr = KSPSolveTranspose(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5251 } else { 5252 ierr = KSPSolve(pcbddc->ksp_R,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5253 } 5254 } else { 5255 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5256 5257 if (applytranspose) { 5258 ierr = MatFactorSolveSchurComplementTranspose(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5259 } else { 5260 ierr = MatFactorSolveSchurComplement(reuse_solver->F,reuse_solver->rhs_B,reuse_solver->sol_B);CHKERRQ(ierr); 5261 } 5262 } 5263 ierr = VecSet(inout_B,0.);CHKERRQ(ierr); 5264 if (!pcbddc->switch_static) { 5265 if (!reuse_solver) { 5266 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5267 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5268 } else { 5269 PCBDDCReuseSolvers reuse_solver = sub_schurs->reuse_solver; 5270 5271 ierr = VecScatterBegin(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5272 ierr = VecScatterEnd(reuse_solver->correction_scatter_B,reuse_solver->sol_B,inout_B,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5273 } 5274 if (!applytranspose && pcbddc->local_auxmat1) { 5275 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5276 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,inout_B,inout_B);CHKERRQ(ierr); 5277 } 5278 } else { 5279 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5280 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5281 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5282 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5283 if (!applytranspose && pcbddc->local_auxmat1) { 5284 ierr = MatMult(pcbddc->local_auxmat1,inout_B,pcbddc->vec1_C);CHKERRQ(ierr); 5285 ierr = MatMultAdd(pcbddc->local_auxmat2,pcbddc->vec1_C,pcbddc->vec1_R,pcbddc->vec1_R);CHKERRQ(ierr); 5286 } 5287 ierr = VecScatterBegin(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5288 ierr = VecScatterEnd(pcbddc->R_to_B,pcbddc->vec1_R,inout_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5289 ierr = VecScatterBegin(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5290 ierr = VecScatterEnd(pcbddc->R_to_D,pcbddc->vec1_R,inout_D,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5291 } 5292 PetscFunctionReturn(0); 5293 } 5294 5295 /* parameter apply transpose determines if the interface preconditioner should be applied transposed or not */ 5296 PetscErrorCode PCBDDCApplyInterfacePreconditioner(PC pc, PetscBool applytranspose) 5297 { 5298 PetscErrorCode ierr; 5299 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5300 PC_IS* pcis = (PC_IS*) (pc->data); 5301 const PetscScalar zero = 0.0; 5302 5303 PetscFunctionBegin; 5304 PetscBool ss = PETSC_FALSE; 5305 ierr = PetscOptionsGetBool(NULL,NULL,"-swap",&ss,NULL);CHKERRQ(ierr); 5306 if (ss) { 5307 Mat save_B = pcbddc->coarse_phi_B; 5308 pcbddc->coarse_phi_B = pcbddc->coarse_psi_B; 5309 pcbddc->coarse_psi_B = save_B; 5310 Mat save_D = pcbddc->coarse_phi_D; 5311 pcbddc->coarse_phi_D = pcbddc->coarse_psi_D; 5312 pcbddc->coarse_psi_D = save_D; 5313 } 5314 /* Application of PSI^T or PHI^T (depending on applytranspose, see comment above) */ 5315 if (!pcbddc->benign_apply_coarse_only) { 5316 if (applytranspose) { 5317 ierr = MatMultTranspose(pcbddc->coarse_phi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5318 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_phi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5319 } else { 5320 ierr = MatMultTranspose(pcbddc->coarse_psi_B,pcis->vec1_B,pcbddc->vec1_P);CHKERRQ(ierr); 5321 if (pcbddc->switch_static) { ierr = MatMultTransposeAdd(pcbddc->coarse_psi_D,pcis->vec1_D,pcbddc->vec1_P,pcbddc->vec1_P);CHKERRQ(ierr); } 5322 } 5323 } else { 5324 ierr = VecSet(pcbddc->vec1_P,zero);CHKERRQ(ierr); 5325 } 5326 5327 /* add p0 to the last value of vec1_P holding the coarse dof relative to p0 */ 5328 if (pcbddc->benign_n) { 5329 PetscScalar *array; 5330 PetscInt j; 5331 5332 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5333 for (j=0;j<pcbddc->benign_n;j++) array[pcbddc->local_primal_size-pcbddc->benign_n+j] += pcbddc->benign_p0[j]; 5334 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5335 } 5336 5337 /* start communications from local primal nodes to rhs of coarse solver */ 5338 ierr = VecSet(pcbddc->coarse_vec,zero);CHKERRQ(ierr); 5339 ierr = PCBDDCScatterCoarseDataBegin(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5340 ierr = PCBDDCScatterCoarseDataEnd(pc,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5341 5342 /* Coarse solution -> rhs and sol updated inside PCBDDCScattarCoarseDataBegin/End */ 5343 if (pcbddc->coarse_ksp) { 5344 Mat coarse_mat; 5345 Vec rhs,sol; 5346 MatNullSpace nullsp; 5347 PetscBool isbddc = PETSC_FALSE; 5348 5349 if (pcbddc->benign_have_null) { 5350 PC coarse_pc; 5351 5352 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5353 ierr = PetscObjectTypeCompare((PetscObject)coarse_pc,PCBDDC,&isbddc);CHKERRQ(ierr); 5354 /* we need to propagate to coarser levels the need for a possible benign correction */ 5355 if (isbddc && pcbddc->benign_apply_coarse_only && !pcbddc->benign_skip_correction) { 5356 PC_BDDC* coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5357 coarsepcbddc->benign_skip_correction = PETSC_FALSE; 5358 coarsepcbddc->benign_apply_coarse_only = PETSC_TRUE; 5359 } 5360 } 5361 ierr = KSPGetRhs(pcbddc->coarse_ksp,&rhs);CHKERRQ(ierr); 5362 ierr = KSPGetSolution(pcbddc->coarse_ksp,&sol);CHKERRQ(ierr); 5363 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 5364 ierr = MatGetNullSpace(coarse_mat,&nullsp);CHKERRQ(ierr); 5365 if (nullsp) { 5366 ierr = MatNullSpaceRemove(nullsp,rhs);CHKERRQ(ierr); 5367 } 5368 if (applytranspose) { 5369 if (pcbddc->benign_apply_coarse_only) SETERRQ(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),PETSC_ERR_SUP,"Not yet implemented"); 5370 ierr = KSPSolveTranspose(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5371 } else { 5372 if (pcbddc->benign_apply_coarse_only && isbddc) { /* need just to apply the coarse preconditioner during presolve */ 5373 PC coarse_pc; 5374 5375 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5376 ierr = PCPreSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5377 ierr = PCBDDCBenignRemoveInterior(coarse_pc,rhs,sol);CHKERRQ(ierr); 5378 ierr = PCPostSolve(coarse_pc,pcbddc->coarse_ksp);CHKERRQ(ierr); 5379 } else { 5380 ierr = KSPSolve(pcbddc->coarse_ksp,rhs,sol);CHKERRQ(ierr); 5381 } 5382 } 5383 /* we don't need the benign correction at coarser levels anymore */ 5384 if (pcbddc->benign_have_null && isbddc) { 5385 PC coarse_pc; 5386 PC_BDDC* coarsepcbddc; 5387 5388 ierr = KSPGetPC(pcbddc->coarse_ksp,&coarse_pc);CHKERRQ(ierr); 5389 coarsepcbddc = (PC_BDDC*)(coarse_pc->data); 5390 coarsepcbddc->benign_skip_correction = PETSC_TRUE; 5391 coarsepcbddc->benign_apply_coarse_only = PETSC_FALSE; 5392 } 5393 if (nullsp) { 5394 ierr = MatNullSpaceRemove(nullsp,sol);CHKERRQ(ierr); 5395 } 5396 } 5397 5398 /* Local solution on R nodes */ 5399 if (pcis->n && !pcbddc->benign_apply_coarse_only) { 5400 ierr = PCBDDCSolveSubstructureCorrection(pc,pcis->vec1_B,pcis->vec1_D,applytranspose);CHKERRQ(ierr); 5401 } 5402 /* communications from coarse sol to local primal nodes */ 5403 ierr = PCBDDCScatterCoarseDataBegin(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5404 ierr = PCBDDCScatterCoarseDataEnd(pc,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 5405 5406 /* Sum contributions from the two levels */ 5407 if (!pcbddc->benign_apply_coarse_only) { 5408 if (applytranspose) { 5409 ierr = MatMultAdd(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5410 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_psi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5411 } else { 5412 ierr = MatMultAdd(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 5413 if (pcbddc->switch_static) { ierr = MatMultAdd(pcbddc->coarse_phi_D,pcbddc->vec1_P,pcis->vec1_D,pcis->vec1_D);CHKERRQ(ierr); } 5414 } 5415 /* store p0 */ 5416 if (pcbddc->benign_n) { 5417 PetscScalar *array; 5418 PetscInt j; 5419 5420 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5421 for (j=0;j<pcbddc->benign_n;j++) pcbddc->benign_p0[j] = array[pcbddc->local_primal_size-pcbddc->benign_n+j]; 5422 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 5423 } 5424 } else { /* expand the coarse solution */ 5425 if (applytranspose) { 5426 ierr = MatMult(pcbddc->coarse_psi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5427 } else { 5428 ierr = MatMult(pcbddc->coarse_phi_B,pcbddc->vec1_P,pcis->vec1_B);CHKERRQ(ierr); 5429 } 5430 } 5431 if (ss) { 5432 Mat save_B = pcbddc->coarse_phi_B; 5433 pcbddc->coarse_phi_B = pcbddc->coarse_psi_B; 5434 pcbddc->coarse_psi_B = save_B; 5435 Mat save_D = pcbddc->coarse_phi_D; 5436 pcbddc->coarse_phi_D = pcbddc->coarse_psi_D; 5437 pcbddc->coarse_psi_D = save_D; 5438 } 5439 PetscFunctionReturn(0); 5440 } 5441 5442 PetscErrorCode PCBDDCScatterCoarseDataBegin(PC pc,InsertMode imode, ScatterMode smode) 5443 { 5444 PetscErrorCode ierr; 5445 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5446 PetscScalar *array; 5447 Vec from,to; 5448 5449 PetscFunctionBegin; 5450 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5451 from = pcbddc->coarse_vec; 5452 to = pcbddc->vec1_P; 5453 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5454 Vec tvec; 5455 5456 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5457 ierr = VecResetArray(tvec);CHKERRQ(ierr); 5458 ierr = KSPGetSolution(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5459 ierr = VecGetArray(tvec,&array);CHKERRQ(ierr); 5460 ierr = VecPlaceArray(from,array);CHKERRQ(ierr); 5461 ierr = VecRestoreArray(tvec,&array);CHKERRQ(ierr); 5462 } 5463 } else { /* from local to global -> put data in coarse right hand side */ 5464 from = pcbddc->vec1_P; 5465 to = pcbddc->coarse_vec; 5466 } 5467 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5468 PetscFunctionReturn(0); 5469 } 5470 5471 PetscErrorCode PCBDDCScatterCoarseDataEnd(PC pc, InsertMode imode, ScatterMode smode) 5472 { 5473 PetscErrorCode ierr; 5474 PC_BDDC* pcbddc = (PC_BDDC*)(pc->data); 5475 PetscScalar *array; 5476 Vec from,to; 5477 5478 PetscFunctionBegin; 5479 if (smode == SCATTER_REVERSE) { /* from global to local -> get data from coarse solution */ 5480 from = pcbddc->coarse_vec; 5481 to = pcbddc->vec1_P; 5482 } else { /* from local to global -> put data in coarse right hand side */ 5483 from = pcbddc->vec1_P; 5484 to = pcbddc->coarse_vec; 5485 } 5486 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,from,to,imode,smode);CHKERRQ(ierr); 5487 if (smode == SCATTER_FORWARD) { 5488 if (pcbddc->coarse_ksp) { /* get array from coarse processes */ 5489 Vec tvec; 5490 5491 ierr = KSPGetRhs(pcbddc->coarse_ksp,&tvec);CHKERRQ(ierr); 5492 ierr = VecGetArray(to,&array);CHKERRQ(ierr); 5493 ierr = VecPlaceArray(tvec,array);CHKERRQ(ierr); 5494 ierr = VecRestoreArray(to,&array);CHKERRQ(ierr); 5495 } 5496 } else { 5497 if (pcbddc->coarse_ksp) { /* restore array of pcbddc->coarse_vec */ 5498 ierr = VecResetArray(from);CHKERRQ(ierr); 5499 } 5500 } 5501 PetscFunctionReturn(0); 5502 } 5503 5504 /* uncomment for testing purposes */ 5505 /* #define PETSC_MISSING_LAPACK_GESVD 1 */ 5506 PetscErrorCode PCBDDCConstraintsSetUp(PC pc) 5507 { 5508 PetscErrorCode ierr; 5509 PC_IS* pcis = (PC_IS*)(pc->data); 5510 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 5511 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 5512 /* one and zero */ 5513 PetscScalar one=1.0,zero=0.0; 5514 /* space to store constraints and their local indices */ 5515 PetscScalar *constraints_data; 5516 PetscInt *constraints_idxs,*constraints_idxs_B; 5517 PetscInt *constraints_idxs_ptr,*constraints_data_ptr; 5518 PetscInt *constraints_n; 5519 /* iterators */ 5520 PetscInt i,j,k,total_counts,total_counts_cc,cum; 5521 /* BLAS integers */ 5522 PetscBLASInt lwork,lierr; 5523 PetscBLASInt Blas_N,Blas_M,Blas_K,Blas_one=1; 5524 PetscBLASInt Blas_LDA,Blas_LDB,Blas_LDC; 5525 /* reuse */ 5526 PetscInt olocal_primal_size,olocal_primal_size_cc; 5527 PetscInt *olocal_primal_ref_node,*olocal_primal_ref_mult; 5528 /* change of basis */ 5529 PetscBool qr_needed; 5530 PetscBT change_basis,qr_needed_idx; 5531 /* auxiliary stuff */ 5532 PetscInt *nnz,*is_indices; 5533 PetscInt ncc; 5534 /* some quantities */ 5535 PetscInt n_vertices,total_primal_vertices,valid_constraints; 5536 PetscInt size_of_constraint,max_size_of_constraint=0,max_constraints,temp_constraints; 5537 5538 PetscFunctionBegin; 5539 /* Destroy Mat objects computed previously */ 5540 ierr = MatDestroy(&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 5541 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 5542 ierr = MatDestroy(&pcbddc->switch_static_change);CHKERRQ(ierr); 5543 /* save info on constraints from previous setup (if any) */ 5544 olocal_primal_size = pcbddc->local_primal_size; 5545 olocal_primal_size_cc = pcbddc->local_primal_size_cc; 5546 ierr = PetscMalloc2(olocal_primal_size_cc,&olocal_primal_ref_node,olocal_primal_size_cc,&olocal_primal_ref_mult);CHKERRQ(ierr); 5547 ierr = PetscMemcpy(olocal_primal_ref_node,pcbddc->local_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5548 ierr = PetscMemcpy(olocal_primal_ref_mult,pcbddc->local_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt));CHKERRQ(ierr); 5549 ierr = PetscFree2(pcbddc->local_primal_ref_node,pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 5550 ierr = PetscFree(pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 5551 5552 if (!pcbddc->adaptive_selection) { 5553 IS ISForVertices,*ISForFaces,*ISForEdges; 5554 MatNullSpace nearnullsp; 5555 const Vec *nearnullvecs; 5556 Vec *localnearnullsp; 5557 PetscScalar *array; 5558 PetscInt n_ISForFaces,n_ISForEdges,nnsp_size; 5559 PetscBool nnsp_has_cnst; 5560 /* LAPACK working arrays for SVD or POD */ 5561 PetscBool skip_lapack,boolforchange; 5562 PetscScalar *work; 5563 PetscReal *singular_vals; 5564 #if defined(PETSC_USE_COMPLEX) 5565 PetscReal *rwork; 5566 #endif 5567 #if defined(PETSC_MISSING_LAPACK_GESVD) 5568 PetscScalar *temp_basis,*correlation_mat; 5569 #else 5570 PetscBLASInt dummy_int=1; 5571 PetscScalar dummy_scalar=1.; 5572 #endif 5573 5574 /* Get index sets for faces, edges and vertices from graph */ 5575 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,&n_ISForFaces,&ISForFaces,&n_ISForEdges,&ISForEdges,&ISForVertices);CHKERRQ(ierr); 5576 /* print some info */ 5577 if (pcbddc->dbg_flag && (!pcbddc->sub_schurs || pcbddc->sub_schurs_rebuild)) { 5578 PetscInt nv; 5579 5580 ierr = PCBDDCGraphASCIIView(pcbddc->mat_graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 5581 ierr = ISGetSize(ISForVertices,&nv);CHKERRQ(ierr); 5582 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5583 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 5584 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 5585 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,n_ISForEdges,pcbddc->use_edges);CHKERRQ(ierr); 5586 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,n_ISForFaces,pcbddc->use_faces);CHKERRQ(ierr); 5587 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 5588 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 5589 } 5590 5591 /* free unneeded index sets */ 5592 if (!pcbddc->use_vertices) { 5593 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5594 } 5595 if (!pcbddc->use_edges) { 5596 for (i=0;i<n_ISForEdges;i++) { 5597 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5598 } 5599 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5600 n_ISForEdges = 0; 5601 } 5602 if (!pcbddc->use_faces) { 5603 for (i=0;i<n_ISForFaces;i++) { 5604 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5605 } 5606 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5607 n_ISForFaces = 0; 5608 } 5609 5610 /* check if near null space is attached to global mat */ 5611 ierr = MatGetNearNullSpace(pc->pmat,&nearnullsp);CHKERRQ(ierr); 5612 if (nearnullsp) { 5613 ierr = MatNullSpaceGetVecs(nearnullsp,&nnsp_has_cnst,&nnsp_size,&nearnullvecs);CHKERRQ(ierr); 5614 /* remove any stored info */ 5615 ierr = MatNullSpaceDestroy(&pcbddc->onearnullspace);CHKERRQ(ierr); 5616 ierr = PetscFree(pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5617 /* store information for BDDC solver reuse */ 5618 ierr = PetscObjectReference((PetscObject)nearnullsp);CHKERRQ(ierr); 5619 pcbddc->onearnullspace = nearnullsp; 5620 ierr = PetscMalloc1(nnsp_size,&pcbddc->onearnullvecs_state);CHKERRQ(ierr); 5621 for (i=0;i<nnsp_size;i++) { 5622 ierr = PetscObjectStateGet((PetscObject)nearnullvecs[i],&pcbddc->onearnullvecs_state[i]);CHKERRQ(ierr); 5623 } 5624 } else { /* if near null space is not provided BDDC uses constants by default */ 5625 nnsp_size = 0; 5626 nnsp_has_cnst = PETSC_TRUE; 5627 } 5628 /* get max number of constraints on a single cc */ 5629 max_constraints = nnsp_size; 5630 if (nnsp_has_cnst) max_constraints++; 5631 5632 /* 5633 Evaluate maximum storage size needed by the procedure 5634 - Indices for connected component i stored at "constraints_idxs + constraints_idxs_ptr[i]" 5635 - Values for constraints on connected component i stored at "constraints_data + constraints_data_ptr[i]" 5636 There can be multiple constraints per connected component 5637 */ 5638 n_vertices = 0; 5639 if (ISForVertices) { 5640 ierr = ISGetSize(ISForVertices,&n_vertices);CHKERRQ(ierr); 5641 } 5642 ncc = n_vertices+n_ISForFaces+n_ISForEdges; 5643 ierr = PetscMalloc3(ncc+1,&constraints_idxs_ptr,ncc+1,&constraints_data_ptr,ncc,&constraints_n);CHKERRQ(ierr); 5644 5645 total_counts = n_ISForFaces+n_ISForEdges; 5646 total_counts *= max_constraints; 5647 total_counts += n_vertices; 5648 ierr = PetscBTCreate(total_counts,&change_basis);CHKERRQ(ierr); 5649 5650 total_counts = 0; 5651 max_size_of_constraint = 0; 5652 for (i=0;i<n_ISForEdges+n_ISForFaces;i++) { 5653 IS used_is; 5654 if (i<n_ISForEdges) { 5655 used_is = ISForEdges[i]; 5656 } else { 5657 used_is = ISForFaces[i-n_ISForEdges]; 5658 } 5659 ierr = ISGetSize(used_is,&j);CHKERRQ(ierr); 5660 total_counts += j; 5661 max_size_of_constraint = PetscMax(j,max_size_of_constraint); 5662 } 5663 ierr = PetscMalloc3(total_counts*max_constraints+n_vertices,&constraints_data,total_counts+n_vertices,&constraints_idxs,total_counts+n_vertices,&constraints_idxs_B);CHKERRQ(ierr); 5664 5665 /* get local part of global near null space vectors */ 5666 ierr = PetscMalloc1(nnsp_size,&localnearnullsp);CHKERRQ(ierr); 5667 for (k=0;k<nnsp_size;k++) { 5668 ierr = VecDuplicate(pcis->vec1_N,&localnearnullsp[k]);CHKERRQ(ierr); 5669 ierr = VecScatterBegin(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5670 ierr = VecScatterEnd(matis->rctx,nearnullvecs[k],localnearnullsp[k],INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 5671 } 5672 5673 /* whether or not to skip lapack calls */ 5674 skip_lapack = PETSC_TRUE; 5675 if (n_ISForFaces+n_ISForEdges && max_constraints > 1 && !pcbddc->use_nnsp_true) skip_lapack = PETSC_FALSE; 5676 5677 /* First we issue queries to allocate optimal workspace for LAPACKgesvd (or LAPACKsyev if SVD is missing) */ 5678 if (!skip_lapack) { 5679 PetscScalar temp_work; 5680 5681 #if defined(PETSC_MISSING_LAPACK_GESVD) 5682 /* Proper Orthogonal Decomposition (POD) using the snapshot method */ 5683 ierr = PetscMalloc1(max_constraints*max_constraints,&correlation_mat);CHKERRQ(ierr); 5684 ierr = PetscMalloc1(max_constraints,&singular_vals);CHKERRQ(ierr); 5685 ierr = PetscMalloc1(max_size_of_constraint*max_constraints,&temp_basis);CHKERRQ(ierr); 5686 #if defined(PETSC_USE_COMPLEX) 5687 ierr = PetscMalloc1(3*max_constraints,&rwork);CHKERRQ(ierr); 5688 #endif 5689 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5690 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 5691 ierr = PetscBLASIntCast(max_constraints,&Blas_LDA);CHKERRQ(ierr); 5692 lwork = -1; 5693 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5694 #if !defined(PETSC_USE_COMPLEX) 5695 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,&lierr)); 5696 #else 5697 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,&temp_work,&lwork,rwork,&lierr)); 5698 #endif 5699 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5700 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to SYEV Lapack routine %d",(int)lierr); 5701 #else /* on missing GESVD */ 5702 /* SVD */ 5703 PetscInt max_n,min_n; 5704 max_n = max_size_of_constraint; 5705 min_n = max_constraints; 5706 if (max_size_of_constraint < max_constraints) { 5707 min_n = max_size_of_constraint; 5708 max_n = max_constraints; 5709 } 5710 ierr = PetscMalloc1(min_n,&singular_vals);CHKERRQ(ierr); 5711 #if defined(PETSC_USE_COMPLEX) 5712 ierr = PetscMalloc1(5*min_n,&rwork);CHKERRQ(ierr); 5713 #endif 5714 /* now we evaluate the optimal workspace using query with lwork=-1 */ 5715 lwork = -1; 5716 ierr = PetscBLASIntCast(max_n,&Blas_M);CHKERRQ(ierr); 5717 ierr = PetscBLASIntCast(min_n,&Blas_N);CHKERRQ(ierr); 5718 ierr = PetscBLASIntCast(max_n,&Blas_LDA);CHKERRQ(ierr); 5719 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5720 #if !defined(PETSC_USE_COMPLEX) 5721 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,&lierr)); 5722 #else 5723 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,&constraints_data[0],&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,&temp_work,&lwork,rwork,&lierr)); 5724 #endif 5725 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5726 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GESVD Lapack routine %d",(int)lierr); 5727 #endif /* on missing GESVD */ 5728 /* Allocate optimal workspace */ 5729 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(temp_work),&lwork);CHKERRQ(ierr); 5730 ierr = PetscMalloc1(lwork,&work);CHKERRQ(ierr); 5731 } 5732 /* Now we can loop on constraining sets */ 5733 total_counts = 0; 5734 constraints_idxs_ptr[0] = 0; 5735 constraints_data_ptr[0] = 0; 5736 /* vertices */ 5737 if (n_vertices) { 5738 ierr = ISGetIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5739 ierr = PetscMemcpy(constraints_idxs,is_indices,n_vertices*sizeof(PetscInt));CHKERRQ(ierr); 5740 for (i=0;i<n_vertices;i++) { 5741 constraints_n[total_counts] = 1; 5742 constraints_data[total_counts] = 1.0; 5743 constraints_idxs_ptr[total_counts+1] = constraints_idxs_ptr[total_counts]+1; 5744 constraints_data_ptr[total_counts+1] = constraints_data_ptr[total_counts]+1; 5745 total_counts++; 5746 } 5747 ierr = ISRestoreIndices(ISForVertices,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5748 n_vertices = total_counts; 5749 } 5750 5751 /* edges and faces */ 5752 total_counts_cc = total_counts; 5753 for (ncc=0;ncc<n_ISForEdges+n_ISForFaces;ncc++) { 5754 IS used_is; 5755 PetscBool idxs_copied = PETSC_FALSE; 5756 5757 if (ncc<n_ISForEdges) { 5758 used_is = ISForEdges[ncc]; 5759 boolforchange = pcbddc->use_change_of_basis; /* change or not the basis on the edge */ 5760 } else { 5761 used_is = ISForFaces[ncc-n_ISForEdges]; 5762 boolforchange = (PetscBool)(pcbddc->use_change_of_basis && pcbddc->use_change_on_faces); /* change or not the basis on the face */ 5763 } 5764 temp_constraints = 0; /* zero the number of constraints I have on this conn comp */ 5765 5766 ierr = ISGetSize(used_is,&size_of_constraint);CHKERRQ(ierr); 5767 ierr = ISGetIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5768 /* change of basis should not be performed on local periodic nodes */ 5769 if (pcbddc->mat_graph->mirrors && pcbddc->mat_graph->mirrors[is_indices[0]]) boolforchange = PETSC_FALSE; 5770 if (nnsp_has_cnst) { 5771 PetscScalar quad_value; 5772 5773 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5774 idxs_copied = PETSC_TRUE; 5775 5776 if (!pcbddc->use_nnsp_true) { 5777 quad_value = (PetscScalar)(1.0/PetscSqrtReal((PetscReal)size_of_constraint)); 5778 } else { 5779 quad_value = 1.0; 5780 } 5781 for (j=0;j<size_of_constraint;j++) { 5782 constraints_data[constraints_data_ptr[total_counts_cc]+j] = quad_value; 5783 } 5784 temp_constraints++; 5785 total_counts++; 5786 } 5787 for (k=0;k<nnsp_size;k++) { 5788 PetscReal real_value; 5789 PetscScalar *ptr_to_data; 5790 5791 ierr = VecGetArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5792 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]+temp_constraints*size_of_constraint]; 5793 for (j=0;j<size_of_constraint;j++) { 5794 ptr_to_data[j] = array[is_indices[j]]; 5795 } 5796 ierr = VecRestoreArrayRead(localnearnullsp[k],(const PetscScalar**)&array);CHKERRQ(ierr); 5797 /* check if array is null on the connected component */ 5798 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5799 PetscStackCallBLAS("BLASasum",real_value = BLASasum_(&Blas_N,ptr_to_data,&Blas_one)); 5800 if (real_value > 0.0) { /* keep indices and values */ 5801 temp_constraints++; 5802 total_counts++; 5803 if (!idxs_copied) { 5804 ierr = PetscMemcpy(constraints_idxs + constraints_idxs_ptr[total_counts_cc],is_indices,size_of_constraint*sizeof(PetscInt));CHKERRQ(ierr); 5805 idxs_copied = PETSC_TRUE; 5806 } 5807 } 5808 } 5809 ierr = ISRestoreIndices(used_is,(const PetscInt**)&is_indices);CHKERRQ(ierr); 5810 valid_constraints = temp_constraints; 5811 if (!pcbddc->use_nnsp_true && temp_constraints) { 5812 if (temp_constraints == 1) { /* just normalize the constraint */ 5813 PetscScalar norm,*ptr_to_data; 5814 5815 ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5816 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5817 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,ptr_to_data,&Blas_one,ptr_to_data,&Blas_one)); 5818 norm = 1.0/PetscSqrtReal(PetscRealPart(norm)); 5819 PetscStackCallBLAS("BLASscal",BLASscal_(&Blas_N,&norm,ptr_to_data,&Blas_one)); 5820 } else { /* perform SVD */ 5821 PetscReal tol = 1.0e-8; /* tolerance for retaining eigenmodes */ 5822 PetscScalar *ptr_to_data = &constraints_data[constraints_data_ptr[total_counts_cc]]; 5823 5824 #if defined(PETSC_MISSING_LAPACK_GESVD) 5825 /* SVD: Y = U*S*V^H -> U (eigenvectors of Y*Y^H) = Y*V*(S)^\dag 5826 POD: Y^H*Y = V*D*V^H, D = S^H*S -> U = Y*V*D^(-1/2) 5827 -> When PETSC_USE_COMPLEX and PETSC_MISSING_LAPACK_GESVD are defined 5828 the constraints basis will differ (by a complex factor with absolute value equal to 1) 5829 from that computed using LAPACKgesvd 5830 -> This is due to a different computation of eigenvectors in LAPACKheev 5831 -> The quality of the POD-computed basis will be the same */ 5832 ierr = PetscMemzero(correlation_mat,temp_constraints*temp_constraints*sizeof(PetscScalar));CHKERRQ(ierr); 5833 /* Store upper triangular part of correlation matrix */ 5834 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 5835 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5836 for (j=0;j<temp_constraints;j++) { 5837 for (k=0;k<j+1;k++) { 5838 PetscStackCallBLAS("BLASdot",correlation_mat[j*temp_constraints+k] = BLASdot_(&Blas_N,ptr_to_data+k*size_of_constraint,&Blas_one,ptr_to_data+j*size_of_constraint,&Blas_one)); 5839 } 5840 } 5841 /* compute eigenvalues and eigenvectors of correlation matrix */ 5842 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5843 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDA);CHKERRQ(ierr); 5844 #if !defined(PETSC_USE_COMPLEX) 5845 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,&lierr)); 5846 #else 5847 PetscStackCallBLAS("LAPACKsyev",LAPACKsyev_("V","U",&Blas_N,correlation_mat,&Blas_LDA,singular_vals,work,&lwork,rwork,&lierr)); 5848 #endif 5849 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5850 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in SYEV Lapack routine %d",(int)lierr); 5851 /* retain eigenvalues greater than tol: note that LAPACKsyev gives eigs in ascending order */ 5852 j = 0; 5853 while (j < temp_constraints && singular_vals[j] < tol) j++; 5854 total_counts = total_counts-j; 5855 valid_constraints = temp_constraints-j; 5856 /* scale and copy POD basis into used quadrature memory */ 5857 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5858 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5859 ierr = PetscBLASIntCast(temp_constraints,&Blas_K);CHKERRQ(ierr); 5860 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5861 ierr = PetscBLASIntCast(temp_constraints,&Blas_LDB);CHKERRQ(ierr); 5862 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 5863 if (j<temp_constraints) { 5864 PetscInt ii; 5865 for (k=j;k<temp_constraints;k++) singular_vals[k] = 1.0/PetscSqrtReal(singular_vals[k]); 5866 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5867 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,ptr_to_data,&Blas_LDA,correlation_mat,&Blas_LDB,&zero,temp_basis,&Blas_LDC)); 5868 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5869 for (k=0;k<temp_constraints-j;k++) { 5870 for (ii=0;ii<size_of_constraint;ii++) { 5871 ptr_to_data[k*size_of_constraint+ii] = singular_vals[temp_constraints-1-k]*temp_basis[(temp_constraints-1-k)*size_of_constraint+ii]; 5872 } 5873 } 5874 } 5875 #else /* on missing GESVD */ 5876 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 5877 ierr = PetscBLASIntCast(temp_constraints,&Blas_N);CHKERRQ(ierr); 5878 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 5879 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 5880 #if !defined(PETSC_USE_COMPLEX) 5881 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,&lierr)); 5882 #else 5883 PetscStackCallBLAS("LAPACKgesvd",LAPACKgesvd_("O","N",&Blas_M,&Blas_N,ptr_to_data,&Blas_LDA,singular_vals,&dummy_scalar,&dummy_int,&dummy_scalar,&dummy_int,work,&lwork,rwork,&lierr)); 5884 #endif 5885 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GESVD Lapack routine %d",(int)lierr); 5886 ierr = PetscFPTrapPop();CHKERRQ(ierr); 5887 /* retain eigenvalues greater than tol: note that LAPACKgesvd gives eigs in descending order */ 5888 k = temp_constraints; 5889 if (k > size_of_constraint) k = size_of_constraint; 5890 j = 0; 5891 while (j < k && singular_vals[k-j-1] < tol) j++; 5892 valid_constraints = k-j; 5893 total_counts = total_counts-temp_constraints+valid_constraints; 5894 #endif /* on missing GESVD */ 5895 } 5896 } 5897 /* update pointers information */ 5898 if (valid_constraints) { 5899 constraints_n[total_counts_cc] = valid_constraints; 5900 constraints_idxs_ptr[total_counts_cc+1] = constraints_idxs_ptr[total_counts_cc]+size_of_constraint; 5901 constraints_data_ptr[total_counts_cc+1] = constraints_data_ptr[total_counts_cc]+size_of_constraint*valid_constraints; 5902 /* set change_of_basis flag */ 5903 if (boolforchange) { 5904 PetscBTSet(change_basis,total_counts_cc); 5905 } 5906 total_counts_cc++; 5907 } 5908 } 5909 /* free workspace */ 5910 if (!skip_lapack) { 5911 ierr = PetscFree(work);CHKERRQ(ierr); 5912 #if defined(PETSC_USE_COMPLEX) 5913 ierr = PetscFree(rwork);CHKERRQ(ierr); 5914 #endif 5915 ierr = PetscFree(singular_vals);CHKERRQ(ierr); 5916 #if defined(PETSC_MISSING_LAPACK_GESVD) 5917 ierr = PetscFree(correlation_mat);CHKERRQ(ierr); 5918 ierr = PetscFree(temp_basis);CHKERRQ(ierr); 5919 #endif 5920 } 5921 for (k=0;k<nnsp_size;k++) { 5922 ierr = VecDestroy(&localnearnullsp[k]);CHKERRQ(ierr); 5923 } 5924 ierr = PetscFree(localnearnullsp);CHKERRQ(ierr); 5925 /* free index sets of faces, edges and vertices */ 5926 for (i=0;i<n_ISForFaces;i++) { 5927 ierr = ISDestroy(&ISForFaces[i]);CHKERRQ(ierr); 5928 } 5929 if (n_ISForFaces) { 5930 ierr = PetscFree(ISForFaces);CHKERRQ(ierr); 5931 } 5932 for (i=0;i<n_ISForEdges;i++) { 5933 ierr = ISDestroy(&ISForEdges[i]);CHKERRQ(ierr); 5934 } 5935 if (n_ISForEdges) { 5936 ierr = PetscFree(ISForEdges);CHKERRQ(ierr); 5937 } 5938 ierr = ISDestroy(&ISForVertices);CHKERRQ(ierr); 5939 } else { 5940 PCBDDCSubSchurs sub_schurs = pcbddc->sub_schurs; 5941 5942 total_counts = 0; 5943 n_vertices = 0; 5944 if (sub_schurs->is_vertices && pcbddc->use_vertices) { 5945 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 5946 } 5947 max_constraints = 0; 5948 total_counts_cc = 0; 5949 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5950 total_counts += pcbddc->adaptive_constraints_n[i]; 5951 if (pcbddc->adaptive_constraints_n[i]) total_counts_cc++; 5952 max_constraints = PetscMax(max_constraints,pcbddc->adaptive_constraints_n[i]); 5953 } 5954 constraints_idxs_ptr = pcbddc->adaptive_constraints_idxs_ptr; 5955 constraints_data_ptr = pcbddc->adaptive_constraints_data_ptr; 5956 constraints_idxs = pcbddc->adaptive_constraints_idxs; 5957 constraints_data = pcbddc->adaptive_constraints_data; 5958 /* constraints_n differs from pcbddc->adaptive_constraints_n */ 5959 ierr = PetscMalloc1(total_counts_cc,&constraints_n);CHKERRQ(ierr); 5960 total_counts_cc = 0; 5961 for (i=0;i<sub_schurs->n_subs+n_vertices;i++) { 5962 if (pcbddc->adaptive_constraints_n[i]) { 5963 constraints_n[total_counts_cc++] = pcbddc->adaptive_constraints_n[i]; 5964 } 5965 } 5966 #if 0 5967 printf("Found %d totals (%d)\n",total_counts_cc,total_counts); 5968 for (i=0;i<total_counts_cc;i++) { 5969 printf("const %d, start %d",i,constraints_idxs_ptr[i]); 5970 printf(" end %d:\n",constraints_idxs_ptr[i+1]); 5971 for (j=constraints_idxs_ptr[i];j<constraints_idxs_ptr[i+1];j++) { 5972 printf(" %d",constraints_idxs[j]); 5973 } 5974 printf("\n"); 5975 printf("number of cc: %d\n",constraints_n[i]); 5976 } 5977 for (i=0;i<n_vertices;i++) { 5978 PetscPrintf(PETSC_COMM_SELF,"[%d] vertex %d, n %d\n",PetscGlobalRank,i,pcbddc->adaptive_constraints_n[i]); 5979 } 5980 for (i=0;i<sub_schurs->n_subs;i++) { 5981 PetscPrintf(PETSC_COMM_SELF,"[%d] sub %d, edge %d, n %d\n",PetscGlobalRank,i,(PetscBool)PetscBTLookup(sub_schurs->is_edge,i),pcbddc->adaptive_constraints_n[i+n_vertices]); 5982 } 5983 #endif 5984 5985 max_size_of_constraint = 0; 5986 for (i=0;i<total_counts_cc;i++) max_size_of_constraint = PetscMax(max_size_of_constraint,constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]); 5987 ierr = PetscMalloc1(constraints_idxs_ptr[total_counts_cc],&constraints_idxs_B);CHKERRQ(ierr); 5988 /* Change of basis */ 5989 ierr = PetscBTCreate(total_counts_cc,&change_basis);CHKERRQ(ierr); 5990 if (pcbddc->use_change_of_basis) { 5991 for (i=0;i<sub_schurs->n_subs;i++) { 5992 if (PetscBTLookup(sub_schurs->is_edge,i) || pcbddc->use_change_on_faces) { 5993 ierr = PetscBTSet(change_basis,i+n_vertices);CHKERRQ(ierr); 5994 } 5995 } 5996 } 5997 } 5998 pcbddc->local_primal_size = total_counts; 5999 ierr = PetscMalloc1(pcbddc->local_primal_size+pcbddc->benign_n,&pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6000 6001 /* map constraints_idxs in boundary numbering */ 6002 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,constraints_idxs_ptr[total_counts_cc],constraints_idxs,&i,constraints_idxs_B);CHKERRQ(ierr); 6003 if (i != constraints_idxs_ptr[total_counts_cc]) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for constraints indices %D != %D\n",constraints_idxs_ptr[total_counts_cc],i); 6004 6005 /* Create constraint matrix */ 6006 ierr = MatCreate(PETSC_COMM_SELF,&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6007 ierr = MatSetType(pcbddc->ConstraintMatrix,MATAIJ);CHKERRQ(ierr); 6008 ierr = MatSetSizes(pcbddc->ConstraintMatrix,pcbddc->local_primal_size,pcis->n,pcbddc->local_primal_size,pcis->n);CHKERRQ(ierr); 6009 6010 /* find primal_dofs: subdomain corners plus dofs selected as primal after change of basis */ 6011 /* determine if a QR strategy is needed for change of basis */ 6012 qr_needed = PETSC_FALSE; 6013 ierr = PetscBTCreate(total_counts_cc,&qr_needed_idx);CHKERRQ(ierr); 6014 total_primal_vertices=0; 6015 pcbddc->local_primal_size_cc = 0; 6016 for (i=0;i<total_counts_cc;i++) { 6017 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6018 if (size_of_constraint == 1 && pcbddc->mat_graph->custom_minimal_size) { 6019 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]]; 6020 pcbddc->local_primal_size_cc += 1; 6021 } else if (PetscBTLookup(change_basis,i)) { 6022 for (k=0;k<constraints_n[i];k++) { 6023 pcbddc->primal_indices_local_idxs[total_primal_vertices++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6024 } 6025 pcbddc->local_primal_size_cc += constraints_n[i]; 6026 if (constraints_n[i] > 1 || pcbddc->use_qr_single) { 6027 PetscBTSet(qr_needed_idx,i); 6028 qr_needed = PETSC_TRUE; 6029 } 6030 } else { 6031 pcbddc->local_primal_size_cc += 1; 6032 } 6033 } 6034 /* note that the local variable n_vertices used below stores the number of pointwise constraints */ 6035 pcbddc->n_vertices = total_primal_vertices; 6036 /* permute indices in order to have a sorted set of vertices */ 6037 ierr = PetscSortInt(total_primal_vertices,pcbddc->primal_indices_local_idxs);CHKERRQ(ierr); 6038 ierr = PetscMalloc2(pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_node,pcbddc->local_primal_size_cc+pcbddc->benign_n,&pcbddc->local_primal_ref_mult);CHKERRQ(ierr); 6039 ierr = PetscMemcpy(pcbddc->local_primal_ref_node,pcbddc->primal_indices_local_idxs,total_primal_vertices*sizeof(PetscInt));CHKERRQ(ierr); 6040 for (i=0;i<total_primal_vertices;i++) pcbddc->local_primal_ref_mult[i] = 1; 6041 6042 /* nonzero structure of constraint matrix */ 6043 /* and get reference dof for local constraints */ 6044 ierr = PetscMalloc1(pcbddc->local_primal_size,&nnz);CHKERRQ(ierr); 6045 for (i=0;i<total_primal_vertices;i++) nnz[i] = 1; 6046 6047 j = total_primal_vertices; 6048 total_counts = total_primal_vertices; 6049 cum = total_primal_vertices; 6050 for (i=n_vertices;i<total_counts_cc;i++) { 6051 if (!PetscBTLookup(change_basis,i)) { 6052 pcbddc->local_primal_ref_node[cum] = constraints_idxs[constraints_idxs_ptr[i]]; 6053 pcbddc->local_primal_ref_mult[cum] = constraints_n[i]; 6054 cum++; 6055 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6056 for (k=0;k<constraints_n[i];k++) { 6057 pcbddc->primal_indices_local_idxs[total_counts++] = constraints_idxs[constraints_idxs_ptr[i]+k]; 6058 nnz[j+k] = size_of_constraint; 6059 } 6060 j += constraints_n[i]; 6061 } 6062 } 6063 ierr = MatSeqAIJSetPreallocation(pcbddc->ConstraintMatrix,0,nnz);CHKERRQ(ierr); 6064 ierr = PetscFree(nnz);CHKERRQ(ierr); 6065 6066 /* set values in constraint matrix */ 6067 for (i=0;i<total_primal_vertices;i++) { 6068 ierr = MatSetValue(pcbddc->ConstraintMatrix,i,pcbddc->local_primal_ref_node[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 6069 } 6070 total_counts = total_primal_vertices; 6071 for (i=n_vertices;i<total_counts_cc;i++) { 6072 if (!PetscBTLookup(change_basis,i)) { 6073 PetscInt *cols; 6074 6075 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6076 cols = constraints_idxs+constraints_idxs_ptr[i]; 6077 for (k=0;k<constraints_n[i];k++) { 6078 PetscInt row = total_counts+k; 6079 PetscScalar *vals; 6080 6081 vals = constraints_data+constraints_data_ptr[i]+k*size_of_constraint; 6082 ierr = MatSetValues(pcbddc->ConstraintMatrix,1,&row,size_of_constraint,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6083 } 6084 total_counts += constraints_n[i]; 6085 } 6086 } 6087 /* assembling */ 6088 ierr = MatAssemblyBegin(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6089 ierr = MatAssemblyEnd(pcbddc->ConstraintMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6090 6091 /* 6092 ierr = PetscViewerPushFormat(PETSC_VIEWER_STDOUT_SELF,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 6093 ierr = MatView(pcbddc->ConstraintMatrix,(PetscViewer)0);CHKERRQ(ierr); 6094 ierr = PetscViewerPopFormat(PETSC_VIEWER_STDOUT_SELF);CHKERRQ(ierr); 6095 */ 6096 /* Create matrix for change of basis. We don't need it in case pcbddc->use_change_of_basis is FALSE */ 6097 if (pcbddc->use_change_of_basis) { 6098 /* dual and primal dofs on a single cc */ 6099 PetscInt dual_dofs,primal_dofs; 6100 /* working stuff for GEQRF */ 6101 PetscScalar *qr_basis,*qr_tau = NULL,*qr_work,lqr_work_t; 6102 PetscBLASInt lqr_work; 6103 /* working stuff for UNGQR */ 6104 PetscScalar *gqr_work,lgqr_work_t; 6105 PetscBLASInt lgqr_work; 6106 /* working stuff for TRTRS */ 6107 PetscScalar *trs_rhs; 6108 PetscBLASInt Blas_NRHS; 6109 /* pointers for values insertion into change of basis matrix */ 6110 PetscInt *start_rows,*start_cols; 6111 PetscScalar *start_vals; 6112 /* working stuff for values insertion */ 6113 PetscBT is_primal; 6114 PetscInt *aux_primal_numbering_B; 6115 /* matrix sizes */ 6116 PetscInt global_size,local_size; 6117 /* temporary change of basis */ 6118 Mat localChangeOfBasisMatrix; 6119 /* extra space for debugging */ 6120 PetscScalar *dbg_work; 6121 6122 /* local temporary change of basis acts on local interfaces -> dimension is n_B x n_B */ 6123 ierr = MatCreate(PETSC_COMM_SELF,&localChangeOfBasisMatrix);CHKERRQ(ierr); 6124 ierr = MatSetType(localChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6125 ierr = MatSetSizes(localChangeOfBasisMatrix,pcis->n,pcis->n,pcis->n,pcis->n);CHKERRQ(ierr); 6126 /* nonzeros for local mat */ 6127 ierr = PetscMalloc1(pcis->n,&nnz);CHKERRQ(ierr); 6128 if (!pcbddc->benign_change || pcbddc->fake_change) { 6129 for (i=0;i<pcis->n;i++) nnz[i]=1; 6130 } else { 6131 const PetscInt *ii; 6132 PetscInt n; 6133 PetscBool flg_row; 6134 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6135 for (i=0;i<n;i++) nnz[i] = ii[i+1]-ii[i]; 6136 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,NULL,&flg_row);CHKERRQ(ierr); 6137 } 6138 for (i=n_vertices;i<total_counts_cc;i++) { 6139 if (PetscBTLookup(change_basis,i)) { 6140 size_of_constraint = constraints_idxs_ptr[i+1]-constraints_idxs_ptr[i]; 6141 if (PetscBTLookup(qr_needed_idx,i)) { 6142 for (j=0;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = size_of_constraint; 6143 } else { 6144 nnz[constraints_idxs[constraints_idxs_ptr[i]]] = size_of_constraint; 6145 for (j=1;j<size_of_constraint;j++) nnz[constraints_idxs[constraints_idxs_ptr[i]+j]] = 2; 6146 } 6147 } 6148 } 6149 ierr = MatSeqAIJSetPreallocation(localChangeOfBasisMatrix,0,nnz);CHKERRQ(ierr); 6150 ierr = PetscFree(nnz);CHKERRQ(ierr); 6151 /* Set interior change in the matrix */ 6152 if (!pcbddc->benign_change || pcbddc->fake_change) { 6153 for (i=0;i<pcis->n;i++) { 6154 ierr = MatSetValue(localChangeOfBasisMatrix,i,i,1.0,INSERT_VALUES);CHKERRQ(ierr); 6155 } 6156 } else { 6157 const PetscInt *ii,*jj; 6158 PetscScalar *aa; 6159 PetscInt n; 6160 PetscBool flg_row; 6161 ierr = MatGetRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6162 ierr = MatSeqAIJGetArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6163 for (i=0;i<n;i++) { 6164 ierr = MatSetValues(localChangeOfBasisMatrix,1,&i,ii[i+1]-ii[i],jj+ii[i],aa+ii[i],INSERT_VALUES);CHKERRQ(ierr); 6165 } 6166 ierr = MatSeqAIJRestoreArray(pcbddc->benign_change,&aa);CHKERRQ(ierr); 6167 ierr = MatRestoreRowIJ(pcbddc->benign_change,0,PETSC_FALSE,PETSC_FALSE,&n,&ii,&jj,&flg_row);CHKERRQ(ierr); 6168 } 6169 6170 if (pcbddc->dbg_flag) { 6171 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 6172 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Checking change of basis computation for subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 6173 } 6174 6175 6176 /* Now we loop on the constraints which need a change of basis */ 6177 /* 6178 Change of basis matrix is evaluated similarly to the FIRST APPROACH in 6179 Klawonn and Widlund, Dual-primal FETI-DP methods for linear elasticity, (see Sect 6.2.1) 6180 6181 Basic blocks of change of basis matrix T computed by 6182 6183 - Using the following block transformation if there is only a primal dof on the cc (and -pc_bddc_use_qr_single is not specified) 6184 6185 | 1 0 ... 0 s_1/S | 6186 | 0 1 ... 0 s_2/S | 6187 | ... | 6188 | 0 ... 1 s_{n-1}/S | 6189 | -s_1/s_n ... -s_{n-1}/s_n s_n/S | 6190 6191 with S = \sum_{i=1}^n s_i^2 6192 NOTE: in the above example, the primal dof is the last one of the edge in LOCAL ordering 6193 in the current implementation, the primal dof is the first one of the edge in GLOBAL ordering 6194 6195 - QR decomposition of constraints otherwise 6196 */ 6197 if (qr_needed) { 6198 /* space to store Q */ 6199 ierr = PetscMalloc1(max_size_of_constraint*max_size_of_constraint,&qr_basis);CHKERRQ(ierr); 6200 /* array to store scaling factors for reflectors */ 6201 ierr = PetscMalloc1(max_constraints,&qr_tau);CHKERRQ(ierr); 6202 /* first we issue queries for optimal work */ 6203 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6204 ierr = PetscBLASIntCast(max_constraints,&Blas_N);CHKERRQ(ierr); 6205 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6206 lqr_work = -1; 6207 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,&lqr_work_t,&lqr_work,&lierr)); 6208 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to GEQRF Lapack routine %d",(int)lierr); 6209 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lqr_work_t),&lqr_work);CHKERRQ(ierr); 6210 ierr = PetscMalloc1((PetscInt)PetscRealPart(lqr_work_t),&qr_work);CHKERRQ(ierr); 6211 lgqr_work = -1; 6212 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_M);CHKERRQ(ierr); 6213 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_N);CHKERRQ(ierr); 6214 ierr = PetscBLASIntCast(max_constraints,&Blas_K);CHKERRQ(ierr); 6215 ierr = PetscBLASIntCast(max_size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6216 if (Blas_K>Blas_M) Blas_K=Blas_M; /* adjust just for computing optimal work */ 6217 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,&lgqr_work_t,&lgqr_work,&lierr)); 6218 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in query to ORGQR/UNGQR Lapack routine %d",(int)lierr); 6219 ierr = PetscBLASIntCast((PetscInt)PetscRealPart(lgqr_work_t),&lgqr_work);CHKERRQ(ierr); 6220 ierr = PetscMalloc1((PetscInt)PetscRealPart(lgqr_work_t),&gqr_work);CHKERRQ(ierr); 6221 /* array to store rhs and solution of triangular solver */ 6222 ierr = PetscMalloc1(max_constraints*max_constraints,&trs_rhs);CHKERRQ(ierr); 6223 /* allocating workspace for check */ 6224 if (pcbddc->dbg_flag) { 6225 ierr = PetscMalloc1(max_size_of_constraint*(max_constraints+max_size_of_constraint),&dbg_work);CHKERRQ(ierr); 6226 } 6227 } 6228 /* array to store whether a node is primal or not */ 6229 ierr = PetscBTCreate(pcis->n_B,&is_primal);CHKERRQ(ierr); 6230 ierr = PetscMalloc1(total_primal_vertices,&aux_primal_numbering_B);CHKERRQ(ierr); 6231 ierr = ISGlobalToLocalMappingApply(pcis->BtoNmap,IS_GTOLM_DROP,total_primal_vertices,pcbddc->local_primal_ref_node,&i,aux_primal_numbering_B);CHKERRQ(ierr); 6232 if (i != total_primal_vertices) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Error in boundary numbering for BDDC vertices! %D != %D\n",total_primal_vertices,i); 6233 for (i=0;i<total_primal_vertices;i++) { 6234 ierr = PetscBTSet(is_primal,aux_primal_numbering_B[i]);CHKERRQ(ierr); 6235 } 6236 ierr = PetscFree(aux_primal_numbering_B);CHKERRQ(ierr); 6237 6238 /* loop on constraints and see whether or not they need a change of basis and compute it */ 6239 for (total_counts=n_vertices;total_counts<total_counts_cc;total_counts++) { 6240 size_of_constraint = constraints_idxs_ptr[total_counts+1]-constraints_idxs_ptr[total_counts]; 6241 if (PetscBTLookup(change_basis,total_counts)) { 6242 /* get constraint info */ 6243 primal_dofs = constraints_n[total_counts]; 6244 dual_dofs = size_of_constraint-primal_dofs; 6245 6246 if (pcbddc->dbg_flag) { 6247 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraints %d: %d need a change of basis (size %d)\n",total_counts,primal_dofs,size_of_constraint);CHKERRQ(ierr); 6248 } 6249 6250 if (PetscBTLookup(qr_needed_idx,total_counts)) { /* QR */ 6251 6252 /* copy quadrature constraints for change of basis check */ 6253 if (pcbddc->dbg_flag) { 6254 ierr = PetscMemcpy(dbg_work,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6255 } 6256 /* copy temporary constraints into larger work vector (in order to store all columns of Q) */ 6257 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6258 6259 /* compute QR decomposition of constraints */ 6260 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6261 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6262 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6263 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6264 PetscStackCallBLAS("LAPACKgeqrf",LAPACKgeqrf_(&Blas_M,&Blas_N,qr_basis,&Blas_LDA,qr_tau,qr_work,&lqr_work,&lierr)); 6265 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in GEQRF Lapack routine %d",(int)lierr); 6266 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6267 6268 /* explictly compute R^-T */ 6269 ierr = PetscMemzero(trs_rhs,primal_dofs*primal_dofs*sizeof(*trs_rhs));CHKERRQ(ierr); 6270 for (j=0;j<primal_dofs;j++) trs_rhs[j*(primal_dofs+1)] = 1.0; 6271 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6272 ierr = PetscBLASIntCast(primal_dofs,&Blas_NRHS);CHKERRQ(ierr); 6273 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6274 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6275 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6276 PetscStackCallBLAS("LAPACKtrtrs",LAPACKtrtrs_("U","T","N",&Blas_N,&Blas_NRHS,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&lierr)); 6277 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in TRTRS Lapack routine %d",(int)lierr); 6278 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6279 6280 /* explicitly compute all columns of Q (Q = [Q1 | Q2] ) overwriting QR factorization in qr_basis */ 6281 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6282 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6283 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6284 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6285 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6286 PetscStackCallBLAS("LAPACKorgqr",LAPACKorgqr_(&Blas_M,&Blas_N,&Blas_K,qr_basis,&Blas_LDA,qr_tau,gqr_work,&lgqr_work,&lierr)); 6287 if (lierr) SETERRQ1(PETSC_COMM_SELF,PETSC_ERR_LIB,"Error in ORGQR/UNGQR Lapack routine %d",(int)lierr); 6288 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6289 6290 /* first primal_dofs columns of Q need to be re-scaled in order to be unitary w.r.t constraints 6291 i.e. C_{pxn}*Q_{nxn} should be equal to [I_pxp | 0_pxd] (see check below) 6292 where n=size_of_constraint, p=primal_dofs, d=dual_dofs (n=p+d), I and 0 identity and null matrix resp. */ 6293 ierr = PetscBLASIntCast(size_of_constraint,&Blas_M);CHKERRQ(ierr); 6294 ierr = PetscBLASIntCast(primal_dofs,&Blas_N);CHKERRQ(ierr); 6295 ierr = PetscBLASIntCast(primal_dofs,&Blas_K);CHKERRQ(ierr); 6296 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6297 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDB);CHKERRQ(ierr); 6298 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDC);CHKERRQ(ierr); 6299 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6300 PetscStackCallBLAS("BLASgemm",BLASgemm_("N","N",&Blas_M,&Blas_N,&Blas_K,&one,qr_basis,&Blas_LDA,trs_rhs,&Blas_LDB,&zero,constraints_data+constraints_data_ptr[total_counts],&Blas_LDC)); 6301 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6302 ierr = PetscMemcpy(qr_basis,&constraints_data[constraints_data_ptr[total_counts]],size_of_constraint*primal_dofs*sizeof(PetscScalar));CHKERRQ(ierr); 6303 6304 /* insert values in change of basis matrix respecting global ordering of new primal dofs */ 6305 start_rows = &constraints_idxs[constraints_idxs_ptr[total_counts]]; 6306 /* insert cols for primal dofs */ 6307 for (j=0;j<primal_dofs;j++) { 6308 start_vals = &qr_basis[j*size_of_constraint]; 6309 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6310 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6311 } 6312 /* insert cols for dual dofs */ 6313 for (j=0,k=0;j<dual_dofs;k++) { 6314 if (!PetscBTLookup(is_primal,constraints_idxs_B[constraints_idxs_ptr[total_counts]+k])) { 6315 start_vals = &qr_basis[(primal_dofs+j)*size_of_constraint]; 6316 start_cols = &constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6317 ierr = MatSetValues(localChangeOfBasisMatrix,size_of_constraint,start_rows,1,start_cols,start_vals,INSERT_VALUES);CHKERRQ(ierr); 6318 j++; 6319 } 6320 } 6321 6322 /* check change of basis */ 6323 if (pcbddc->dbg_flag) { 6324 PetscInt ii,jj; 6325 PetscBool valid_qr=PETSC_TRUE; 6326 ierr = PetscBLASIntCast(primal_dofs,&Blas_M);CHKERRQ(ierr); 6327 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6328 ierr = PetscBLASIntCast(size_of_constraint,&Blas_K);CHKERRQ(ierr); 6329 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDA);CHKERRQ(ierr); 6330 ierr = PetscBLASIntCast(size_of_constraint,&Blas_LDB);CHKERRQ(ierr); 6331 ierr = PetscBLASIntCast(primal_dofs,&Blas_LDC);CHKERRQ(ierr); 6332 ierr = PetscFPTrapPush(PETSC_FP_TRAP_OFF);CHKERRQ(ierr); 6333 PetscStackCallBLAS("BLASgemm",BLASgemm_("T","N",&Blas_M,&Blas_N,&Blas_K,&one,dbg_work,&Blas_LDA,qr_basis,&Blas_LDB,&zero,&dbg_work[size_of_constraint*primal_dofs],&Blas_LDC)); 6334 ierr = PetscFPTrapPop();CHKERRQ(ierr); 6335 for (jj=0;jj<size_of_constraint;jj++) { 6336 for (ii=0;ii<primal_dofs;ii++) { 6337 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) valid_qr = PETSC_FALSE; 6338 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) valid_qr = PETSC_FALSE; 6339 } 6340 } 6341 if (!valid_qr) { 6342 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> wrong change of basis!\n");CHKERRQ(ierr); 6343 for (jj=0;jj<size_of_constraint;jj++) { 6344 for (ii=0;ii<primal_dofs;ii++) { 6345 if (ii != jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]) > 1.e-12) { 6346 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not orthogonal to constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6347 } 6348 if (ii == jj && PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii]-(PetscReal)1) > 1.e-12) { 6349 PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\tQr basis function %d is not unitary w.r.t constraint %d (%1.14e)!\n",jj,ii,PetscAbsScalar(dbg_work[size_of_constraint*primal_dofs+jj*primal_dofs+ii])); 6350 } 6351 } 6352 } 6353 } else { 6354 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> right change of basis!\n");CHKERRQ(ierr); 6355 } 6356 } 6357 } else { /* simple transformation block */ 6358 PetscInt row,col; 6359 PetscScalar val,norm; 6360 6361 ierr = PetscBLASIntCast(size_of_constraint,&Blas_N);CHKERRQ(ierr); 6362 PetscStackCallBLAS("BLASdot",norm = BLASdot_(&Blas_N,constraints_data+constraints_data_ptr[total_counts],&Blas_one,constraints_data+constraints_data_ptr[total_counts],&Blas_one)); 6363 for (j=0;j<size_of_constraint;j++) { 6364 PetscInt row_B = constraints_idxs_B[constraints_idxs_ptr[total_counts]+j]; 6365 row = constraints_idxs[constraints_idxs_ptr[total_counts]+j]; 6366 if (!PetscBTLookup(is_primal,row_B)) { 6367 col = constraints_idxs[constraints_idxs_ptr[total_counts]]; 6368 ierr = MatSetValue(localChangeOfBasisMatrix,row,row,1.0,INSERT_VALUES);CHKERRQ(ierr); 6369 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,constraints_data[constraints_data_ptr[total_counts]+j]/norm,INSERT_VALUES);CHKERRQ(ierr); 6370 } else { 6371 for (k=0;k<size_of_constraint;k++) { 6372 col = constraints_idxs[constraints_idxs_ptr[total_counts]+k]; 6373 if (row != col) { 6374 val = -constraints_data[constraints_data_ptr[total_counts]+k]/constraints_data[constraints_data_ptr[total_counts]]; 6375 } else { 6376 val = constraints_data[constraints_data_ptr[total_counts]]/norm; 6377 } 6378 ierr = MatSetValue(localChangeOfBasisMatrix,row,col,val,INSERT_VALUES);CHKERRQ(ierr); 6379 } 6380 } 6381 } 6382 if (pcbddc->dbg_flag) { 6383 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"\t-> using standard change of basis\n");CHKERRQ(ierr); 6384 } 6385 } 6386 } else { 6387 if (pcbddc->dbg_flag) { 6388 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Constraint %d does not need a change of basis (size %d)\n",total_counts,size_of_constraint);CHKERRQ(ierr); 6389 } 6390 } 6391 } 6392 6393 /* free workspace */ 6394 if (qr_needed) { 6395 if (pcbddc->dbg_flag) { 6396 ierr = PetscFree(dbg_work);CHKERRQ(ierr); 6397 } 6398 ierr = PetscFree(trs_rhs);CHKERRQ(ierr); 6399 ierr = PetscFree(qr_tau);CHKERRQ(ierr); 6400 ierr = PetscFree(qr_work);CHKERRQ(ierr); 6401 ierr = PetscFree(gqr_work);CHKERRQ(ierr); 6402 ierr = PetscFree(qr_basis);CHKERRQ(ierr); 6403 } 6404 ierr = PetscBTDestroy(&is_primal);CHKERRQ(ierr); 6405 ierr = MatAssemblyBegin(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6406 ierr = MatAssemblyEnd(localChangeOfBasisMatrix,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6407 6408 /* assembling of global change of variable */ 6409 if (!pcbddc->fake_change) { 6410 Mat tmat; 6411 PetscInt bs; 6412 6413 ierr = VecGetSize(pcis->vec1_global,&global_size);CHKERRQ(ierr); 6414 ierr = VecGetLocalSize(pcis->vec1_global,&local_size);CHKERRQ(ierr); 6415 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6416 ierr = MatISSetLocalMat(tmat,localChangeOfBasisMatrix);CHKERRQ(ierr); 6417 ierr = MatCreate(PetscObjectComm((PetscObject)pc),&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6418 ierr = MatSetType(pcbddc->ChangeOfBasisMatrix,MATAIJ);CHKERRQ(ierr); 6419 ierr = MatGetBlockSize(pc->pmat,&bs);CHKERRQ(ierr); 6420 ierr = MatSetBlockSize(pcbddc->ChangeOfBasisMatrix,bs);CHKERRQ(ierr); 6421 ierr = MatSetSizes(pcbddc->ChangeOfBasisMatrix,local_size,local_size,global_size,global_size);CHKERRQ(ierr); 6422 ierr = MatISSetMPIXAIJPreallocation_Private(tmat,pcbddc->ChangeOfBasisMatrix,PETSC_TRUE);CHKERRQ(ierr); 6423 ierr = MatISGetMPIXAIJ(tmat,MAT_REUSE_MATRIX,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6424 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6425 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6426 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6427 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6428 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6429 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6430 ierr = MatDiagonalScale(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,NULL);CHKERRQ(ierr); 6431 6432 /* check */ 6433 if (pcbddc->dbg_flag) { 6434 PetscReal error; 6435 Vec x,x_change; 6436 6437 ierr = VecDuplicate(pcis->vec1_global,&x);CHKERRQ(ierr); 6438 ierr = VecDuplicate(pcis->vec1_global,&x_change);CHKERRQ(ierr); 6439 ierr = VecSetRandom(x,NULL);CHKERRQ(ierr); 6440 ierr = VecCopy(x,pcis->vec1_global);CHKERRQ(ierr); 6441 ierr = VecScatterBegin(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6442 ierr = VecScatterEnd(matis->rctx,x,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6443 ierr = MatMult(localChangeOfBasisMatrix,pcis->vec1_N,pcis->vec2_N);CHKERRQ(ierr); 6444 ierr = VecScatterBegin(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6445 ierr = VecScatterEnd(matis->rctx,pcis->vec2_N,x,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6446 ierr = MatMult(pcbddc->ChangeOfBasisMatrix,pcis->vec1_global,x_change);CHKERRQ(ierr); 6447 ierr = VecAXPY(x,-1.0,x_change);CHKERRQ(ierr); 6448 ierr = VecNorm(x,NORM_INFINITY,&error);CHKERRQ(ierr); 6449 if (error > PETSC_SMALL) { 6450 SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"Error global vs local change on N: %1.6e\n",error); 6451 } 6452 ierr = VecDestroy(&x);CHKERRQ(ierr); 6453 ierr = VecDestroy(&x_change);CHKERRQ(ierr); 6454 } 6455 /* adapt sub_schurs computed (if any) */ 6456 if (pcbddc->use_deluxe_scaling) { 6457 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 6458 6459 if (pcbddc->use_change_of_basis && pcbddc->adaptive_userdefined) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_SUP,"Cannot mix automatic change of basis, adaptive selection and user-defined constraints"); 6460 if (sub_schurs && sub_schurs->S_Ej_all) { 6461 Mat S_new,tmat; 6462 IS is_all_N,is_V_Sall = NULL; 6463 6464 ierr = ISLocalToGlobalMappingApplyIS(pcis->BtoNmap,sub_schurs->is_Ej_all,&is_all_N);CHKERRQ(ierr); 6465 ierr = MatCreateSubMatrix(localChangeOfBasisMatrix,is_all_N,is_all_N,MAT_INITIAL_MATRIX,&tmat);CHKERRQ(ierr); 6466 if (pcbddc->deluxe_zerorows) { 6467 ISLocalToGlobalMapping NtoSall; 6468 IS is_V; 6469 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddc->n_vertices,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&is_V);CHKERRQ(ierr); 6470 ierr = ISLocalToGlobalMappingCreateIS(is_all_N,&NtoSall);CHKERRQ(ierr); 6471 ierr = ISGlobalToLocalMappingApplyIS(NtoSall,IS_GTOLM_DROP,is_V,&is_V_Sall);CHKERRQ(ierr); 6472 ierr = ISLocalToGlobalMappingDestroy(&NtoSall);CHKERRQ(ierr); 6473 ierr = ISDestroy(&is_V);CHKERRQ(ierr); 6474 } 6475 ierr = ISDestroy(&is_all_N);CHKERRQ(ierr); 6476 ierr = MatPtAP(sub_schurs->S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6477 ierr = MatDestroy(&sub_schurs->S_Ej_all);CHKERRQ(ierr); 6478 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6479 if (pcbddc->deluxe_zerorows) { 6480 const PetscScalar *array; 6481 const PetscInt *idxs_V,*idxs_all; 6482 PetscInt i,n_V; 6483 6484 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6485 ierr = ISGetLocalSize(is_V_Sall,&n_V);CHKERRQ(ierr); 6486 ierr = ISGetIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6487 ierr = ISGetIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6488 ierr = VecGetArrayRead(pcis->D,&array);CHKERRQ(ierr); 6489 for (i=0;i<n_V;i++) { 6490 PetscScalar val; 6491 PetscInt idx; 6492 6493 idx = idxs_V[i]; 6494 val = array[idxs_all[idxs_V[i]]]; 6495 ierr = MatSetValue(S_new,idx,idx,val,INSERT_VALUES);CHKERRQ(ierr); 6496 } 6497 ierr = MatAssemblyBegin(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6498 ierr = MatAssemblyEnd(S_new,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6499 ierr = VecRestoreArrayRead(pcis->D,&array);CHKERRQ(ierr); 6500 ierr = ISRestoreIndices(sub_schurs->is_Ej_all,&idxs_all);CHKERRQ(ierr); 6501 ierr = ISRestoreIndices(is_V_Sall,&idxs_V);CHKERRQ(ierr); 6502 } 6503 sub_schurs->S_Ej_all = S_new; 6504 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6505 if (sub_schurs->sum_S_Ej_all) { 6506 ierr = MatPtAP(sub_schurs->sum_S_Ej_all,tmat,MAT_INITIAL_MATRIX,1.0,&S_new);CHKERRQ(ierr); 6507 ierr = MatDestroy(&sub_schurs->sum_S_Ej_all);CHKERRQ(ierr); 6508 ierr = PetscObjectReference((PetscObject)S_new);CHKERRQ(ierr); 6509 if (pcbddc->deluxe_zerorows) { 6510 ierr = MatZeroRowsColumnsIS(S_new,is_V_Sall,1.,NULL,NULL);CHKERRQ(ierr); 6511 } 6512 sub_schurs->sum_S_Ej_all = S_new; 6513 ierr = MatDestroy(&S_new);CHKERRQ(ierr); 6514 } 6515 ierr = ISDestroy(&is_V_Sall);CHKERRQ(ierr); 6516 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6517 } 6518 /* destroy any change of basis context in sub_schurs */ 6519 if (sub_schurs && sub_schurs->change) { 6520 PetscInt i; 6521 6522 for (i=0;i<sub_schurs->n_subs;i++) { 6523 ierr = KSPDestroy(&sub_schurs->change[i]);CHKERRQ(ierr); 6524 } 6525 ierr = PetscFree(sub_schurs->change);CHKERRQ(ierr); 6526 } 6527 } 6528 if (pcbddc->switch_static) { /* need to save the local change */ 6529 pcbddc->switch_static_change = localChangeOfBasisMatrix; 6530 } else { 6531 ierr = MatDestroy(&localChangeOfBasisMatrix);CHKERRQ(ierr); 6532 } 6533 /* determine if any process has changed the pressures locally */ 6534 pcbddc->change_interior = pcbddc->benign_have_null; 6535 } else { /* fake change (get back change of basis into ConstraintMatrix and info on qr) */ 6536 ierr = MatDestroy(&pcbddc->ConstraintMatrix);CHKERRQ(ierr); 6537 pcbddc->ConstraintMatrix = localChangeOfBasisMatrix; 6538 pcbddc->use_qr_single = qr_needed; 6539 } 6540 } else if (pcbddc->user_ChangeOfBasisMatrix || pcbddc->benign_saddle_point) { 6541 if (!pcbddc->benign_have_null && pcbddc->user_ChangeOfBasisMatrix) { 6542 ierr = PetscObjectReference((PetscObject)pcbddc->user_ChangeOfBasisMatrix);CHKERRQ(ierr); 6543 pcbddc->ChangeOfBasisMatrix = pcbddc->user_ChangeOfBasisMatrix; 6544 } else { 6545 Mat benign_global = NULL; 6546 if (pcbddc->benign_have_null) { 6547 Mat tmat; 6548 6549 pcbddc->change_interior = PETSC_TRUE; 6550 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 6551 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 6552 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6553 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 6554 ierr = VecReciprocal(pcis->vec1_global);CHKERRQ(ierr); 6555 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6556 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 6557 ierr = MatDuplicate(pc->pmat,MAT_DO_NOT_COPY_VALUES,&tmat);CHKERRQ(ierr); 6558 if (pcbddc->benign_change) { 6559 Mat M; 6560 6561 ierr = MatDuplicate(pcbddc->benign_change,MAT_COPY_VALUES,&M);CHKERRQ(ierr); 6562 ierr = MatDiagonalScale(M,pcis->vec1_N,NULL);CHKERRQ(ierr); 6563 ierr = MatISSetLocalMat(tmat,M);CHKERRQ(ierr); 6564 ierr = MatDestroy(&M);CHKERRQ(ierr); 6565 } else { 6566 Mat eye; 6567 PetscScalar *array; 6568 6569 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6570 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,pcis->n,pcis->n,1,NULL,&eye);CHKERRQ(ierr); 6571 for (i=0;i<pcis->n;i++) { 6572 ierr = MatSetValue(eye,i,i,array[i],INSERT_VALUES);CHKERRQ(ierr); 6573 } 6574 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 6575 ierr = MatAssemblyBegin(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6576 ierr = MatAssemblyEnd(eye,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6577 ierr = MatISSetLocalMat(tmat,eye);CHKERRQ(ierr); 6578 ierr = MatDestroy(&eye);CHKERRQ(ierr); 6579 } 6580 ierr = MatISGetMPIXAIJ(tmat,MAT_INITIAL_MATRIX,&benign_global);CHKERRQ(ierr); 6581 ierr = MatDestroy(&tmat);CHKERRQ(ierr); 6582 } 6583 if (pcbddc->user_ChangeOfBasisMatrix) { 6584 ierr = MatMatMult(pcbddc->user_ChangeOfBasisMatrix,benign_global,MAT_INITIAL_MATRIX,PETSC_DEFAULT,&pcbddc->ChangeOfBasisMatrix);CHKERRQ(ierr); 6585 ierr = MatDestroy(&benign_global);CHKERRQ(ierr); 6586 } else if (pcbddc->benign_have_null) { 6587 pcbddc->ChangeOfBasisMatrix = benign_global; 6588 } 6589 } 6590 if (pcbddc->switch_static && pcbddc->ChangeOfBasisMatrix) { /* need to save the local change */ 6591 IS is_global; 6592 const PetscInt *gidxs; 6593 6594 ierr = ISLocalToGlobalMappingGetIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6595 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcis->n,gidxs,PETSC_COPY_VALUES,&is_global);CHKERRQ(ierr); 6596 ierr = ISLocalToGlobalMappingRestoreIndices(pc->pmat->rmap->mapping,&gidxs);CHKERRQ(ierr); 6597 ierr = MatCreateSubMatrixUnsorted(pcbddc->ChangeOfBasisMatrix,is_global,is_global,&pcbddc->switch_static_change);CHKERRQ(ierr); 6598 ierr = ISDestroy(&is_global);CHKERRQ(ierr); 6599 } 6600 } 6601 if (!pcbddc->fake_change && pcbddc->ChangeOfBasisMatrix && !pcbddc->work_change) { 6602 ierr = VecDuplicate(pcis->vec1_global,&pcbddc->work_change);CHKERRQ(ierr); 6603 } 6604 6605 if (!pcbddc->fake_change) { 6606 /* add pressure dofs to set of primal nodes for numbering purposes */ 6607 for (i=0;i<pcbddc->benign_n;i++) { 6608 pcbddc->local_primal_ref_node[pcbddc->local_primal_size_cc] = pcbddc->benign_p0_lidx[i]; 6609 pcbddc->primal_indices_local_idxs[pcbddc->local_primal_size] = pcbddc->benign_p0_lidx[i]; 6610 pcbddc->local_primal_ref_mult[pcbddc->local_primal_size_cc] = 1; 6611 pcbddc->local_primal_size_cc++; 6612 pcbddc->local_primal_size++; 6613 } 6614 6615 /* check if a new primal space has been introduced (also take into account benign trick) */ 6616 pcbddc->new_primal_space_local = PETSC_TRUE; 6617 if (olocal_primal_size == pcbddc->local_primal_size) { 6618 ierr = PetscMemcmp(pcbddc->local_primal_ref_node,olocal_primal_ref_node,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6619 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6620 if (!pcbddc->new_primal_space_local) { 6621 ierr = PetscMemcmp(pcbddc->local_primal_ref_mult,olocal_primal_ref_mult,olocal_primal_size_cc*sizeof(PetscInt),&pcbddc->new_primal_space_local);CHKERRQ(ierr); 6622 pcbddc->new_primal_space_local = (PetscBool)(!pcbddc->new_primal_space_local); 6623 } 6624 } 6625 /* new_primal_space will be used for numbering of coarse dofs, so it should be the same across all subdomains */ 6626 ierr = MPIU_Allreduce(&pcbddc->new_primal_space_local,&pcbddc->new_primal_space,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 6627 } 6628 ierr = PetscFree2(olocal_primal_ref_node,olocal_primal_ref_mult);CHKERRQ(ierr); 6629 6630 /* flush dbg viewer */ 6631 if (pcbddc->dbg_flag) { 6632 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6633 } 6634 6635 /* free workspace */ 6636 ierr = PetscBTDestroy(&qr_needed_idx);CHKERRQ(ierr); 6637 ierr = PetscBTDestroy(&change_basis);CHKERRQ(ierr); 6638 if (!pcbddc->adaptive_selection) { 6639 ierr = PetscFree3(constraints_idxs_ptr,constraints_data_ptr,constraints_n);CHKERRQ(ierr); 6640 ierr = PetscFree3(constraints_data,constraints_idxs,constraints_idxs_B);CHKERRQ(ierr); 6641 } else { 6642 ierr = PetscFree5(pcbddc->adaptive_constraints_n, 6643 pcbddc->adaptive_constraints_idxs_ptr, 6644 pcbddc->adaptive_constraints_data_ptr, 6645 pcbddc->adaptive_constraints_idxs, 6646 pcbddc->adaptive_constraints_data);CHKERRQ(ierr); 6647 ierr = PetscFree(constraints_n);CHKERRQ(ierr); 6648 ierr = PetscFree(constraints_idxs_B);CHKERRQ(ierr); 6649 } 6650 PetscFunctionReturn(0); 6651 } 6652 6653 PetscErrorCode PCBDDCAnalyzeInterface(PC pc) 6654 { 6655 ISLocalToGlobalMapping map; 6656 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 6657 Mat_IS *matis = (Mat_IS*)pc->pmat->data; 6658 PetscInt i,N; 6659 PetscBool rcsr = PETSC_FALSE; 6660 PetscErrorCode ierr; 6661 6662 PetscFunctionBegin; 6663 if (pcbddc->recompute_topography) { 6664 pcbddc->graphanalyzed = PETSC_FALSE; 6665 /* Reset previously computed graph */ 6666 ierr = PCBDDCGraphReset(pcbddc->mat_graph);CHKERRQ(ierr); 6667 /* Init local Graph struct */ 6668 ierr = MatGetSize(pc->pmat,&N,NULL);CHKERRQ(ierr); 6669 ierr = MatGetLocalToGlobalMapping(pc->pmat,&map,NULL);CHKERRQ(ierr); 6670 ierr = PCBDDCGraphInit(pcbddc->mat_graph,map,N,pcbddc->graphmaxcount);CHKERRQ(ierr); 6671 6672 if (pcbddc->user_primal_vertices_local && !pcbddc->user_primal_vertices) { 6673 ierr = PCBDDCConsistencyCheckIS(pc,MPI_LOR,&pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6674 } 6675 /* Check validity of the csr graph passed in by the user */ 6676 if (pcbddc->mat_graph->nvtxs_csr && pcbddc->mat_graph->nvtxs_csr != pcbddc->mat_graph->nvtxs) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_ARG_WRONG,"Invalid size of local CSR graph! Found %d, expected %d\n",pcbddc->mat_graph->nvtxs_csr,pcbddc->mat_graph->nvtxs); 6677 6678 /* Set default CSR adjacency of local dofs if not provided by the user with PCBDDCSetLocalAdjacencyGraph */ 6679 if (!pcbddc->mat_graph->xadj && pcbddc->use_local_adj) { 6680 PetscInt *xadj,*adjncy; 6681 PetscInt nvtxs; 6682 PetscBool flg_row=PETSC_FALSE; 6683 6684 ierr = MatGetRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6685 if (flg_row) { 6686 ierr = PCBDDCSetLocalAdjacencyGraph(pc,nvtxs,xadj,adjncy,PETSC_COPY_VALUES);CHKERRQ(ierr); 6687 pcbddc->computed_rowadj = PETSC_TRUE; 6688 } 6689 ierr = MatRestoreRowIJ(matis->A,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,(const PetscInt**)&xadj,(const PetscInt**)&adjncy,&flg_row);CHKERRQ(ierr); 6690 rcsr = PETSC_TRUE; 6691 } 6692 if (pcbddc->dbg_flag) { 6693 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 6694 } 6695 6696 /* Setup of Graph */ 6697 pcbddc->mat_graph->commsizelimit = 0; /* don't use the COMM_SELF variant of the graph */ 6698 ierr = PCBDDCGraphSetUp(pcbddc->mat_graph,pcbddc->vertex_size,pcbddc->NeumannBoundariesLocal,pcbddc->DirichletBoundariesLocal,pcbddc->n_ISForDofsLocal,pcbddc->ISForDofsLocal,pcbddc->user_primal_vertices_local);CHKERRQ(ierr); 6699 6700 /* attach info on disconnected subdomains if present */ 6701 if (pcbddc->n_local_subs) { 6702 PetscInt *local_subs; 6703 6704 ierr = PetscMalloc1(N,&local_subs);CHKERRQ(ierr); 6705 for (i=0;i<pcbddc->n_local_subs;i++) { 6706 const PetscInt *idxs; 6707 PetscInt nl,j; 6708 6709 ierr = ISGetLocalSize(pcbddc->local_subs[i],&nl);CHKERRQ(ierr); 6710 ierr = ISGetIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6711 for (j=0;j<nl;j++) local_subs[idxs[j]] = i; 6712 ierr = ISRestoreIndices(pcbddc->local_subs[i],&idxs);CHKERRQ(ierr); 6713 } 6714 pcbddc->mat_graph->n_local_subs = pcbddc->n_local_subs; 6715 pcbddc->mat_graph->local_subs = local_subs; 6716 } 6717 } 6718 6719 if (!pcbddc->graphanalyzed) { 6720 /* Graph's connected components analysis */ 6721 ierr = PCBDDCGraphComputeConnectedComponents(pcbddc->mat_graph);CHKERRQ(ierr); 6722 pcbddc->graphanalyzed = PETSC_TRUE; 6723 } 6724 if (rcsr) pcbddc->mat_graph->nvtxs_csr = 0; 6725 PetscFunctionReturn(0); 6726 } 6727 6728 PetscErrorCode PCBDDCOrthonormalizeVecs(PetscInt n, Vec vecs[]) 6729 { 6730 PetscInt i,j; 6731 PetscScalar *alphas; 6732 PetscErrorCode ierr; 6733 6734 PetscFunctionBegin; 6735 ierr = PetscMalloc1(n,&alphas);CHKERRQ(ierr); 6736 for (i=0;i<n;i++) { 6737 ierr = VecNormalize(vecs[i],NULL);CHKERRQ(ierr); 6738 ierr = VecMDot(vecs[i],n-i-1,&vecs[i+1],alphas);CHKERRQ(ierr); 6739 for (j=0;j<n-i-1;j++) alphas[j] = PetscConj(-alphas[j]); 6740 ierr = VecMAXPY(vecs[j],n-i-1,alphas,vecs+i);CHKERRQ(ierr); 6741 } 6742 ierr = PetscFree(alphas);CHKERRQ(ierr); 6743 PetscFunctionReturn(0); 6744 } 6745 6746 PetscErrorCode PCBDDCMatISGetSubassemblingPattern(Mat mat, PetscInt *n_subdomains, PetscInt redprocs, IS* is_sends, PetscBool *have_void) 6747 { 6748 Mat A; 6749 PetscInt n_neighs,*neighs,*n_shared,**shared; 6750 PetscMPIInt size,rank,color; 6751 PetscInt *xadj,*adjncy; 6752 PetscInt *adjncy_wgt,*v_wgt,*ranks_send_to_idx; 6753 PetscInt im_active,active_procs,N,n,i,j,threshold = 2; 6754 PetscInt void_procs,*procs_candidates = NULL; 6755 PetscInt xadj_count,*count; 6756 PetscBool ismatis,use_vwgt=PETSC_FALSE; 6757 PetscSubcomm psubcomm; 6758 MPI_Comm subcomm; 6759 PetscErrorCode ierr; 6760 6761 PetscFunctionBegin; 6762 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 6763 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 6764 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 6765 PetscValidLogicalCollectiveInt(mat,*n_subdomains,2); 6766 PetscValidLogicalCollectiveInt(mat,redprocs,3); 6767 if (*n_subdomains <=0) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_ARG_WRONG,"Invalid number of subdomains requested %d\n",*n_subdomains); 6768 6769 if (have_void) *have_void = PETSC_FALSE; 6770 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)mat),&size);CHKERRQ(ierr); 6771 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)mat),&rank);CHKERRQ(ierr); 6772 ierr = MatISGetLocalMat(mat,&A);CHKERRQ(ierr); 6773 ierr = MatGetLocalSize(A,&n,NULL);CHKERRQ(ierr); 6774 im_active = !!n; 6775 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6776 void_procs = size - active_procs; 6777 /* get ranks of of non-active processes in mat communicator */ 6778 if (void_procs) { 6779 PetscInt ncand; 6780 6781 if (have_void) *have_void = PETSC_TRUE; 6782 ierr = PetscMalloc1(size,&procs_candidates);CHKERRQ(ierr); 6783 ierr = MPI_Allgather(&im_active,1,MPIU_INT,procs_candidates,1,MPIU_INT,PetscObjectComm((PetscObject)mat));CHKERRQ(ierr); 6784 for (i=0,ncand=0;i<size;i++) { 6785 if (!procs_candidates[i]) { 6786 procs_candidates[ncand++] = i; 6787 } 6788 } 6789 /* force n_subdomains to be not greater that the number of non-active processes */ 6790 *n_subdomains = PetscMin(void_procs,*n_subdomains); 6791 } 6792 6793 /* number of subdomains requested greater than active processes or matrix size -> just shift the matrix 6794 number of subdomains requested 1 -> send to master or first candidate in voids */ 6795 ierr = MatGetSize(mat,&N,NULL);CHKERRQ(ierr); 6796 if (active_procs < *n_subdomains || *n_subdomains == 1 || N <= *n_subdomains) { 6797 PetscInt issize,isidx,dest; 6798 if (*n_subdomains == 1) dest = 0; 6799 else dest = rank; 6800 if (im_active) { 6801 issize = 1; 6802 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6803 isidx = procs_candidates[dest]; 6804 } else { 6805 isidx = dest; 6806 } 6807 } else { 6808 issize = 0; 6809 isidx = -1; 6810 } 6811 if (*n_subdomains != 1) *n_subdomains = active_procs; 6812 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),issize,&isidx,PETSC_COPY_VALUES,is_sends);CHKERRQ(ierr); 6813 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 6814 PetscFunctionReturn(0); 6815 } 6816 ierr = PetscOptionsGetBool(NULL,NULL,"-matis_partitioning_use_vwgt",&use_vwgt,NULL);CHKERRQ(ierr); 6817 ierr = PetscOptionsGetInt(NULL,NULL,"-matis_partitioning_threshold",&threshold,NULL);CHKERRQ(ierr); 6818 threshold = PetscMax(threshold,2); 6819 6820 /* Get info on mapping */ 6821 ierr = ISLocalToGlobalMappingGetInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6822 6823 /* build local CSR graph of subdomains' connectivity */ 6824 ierr = PetscMalloc1(2,&xadj);CHKERRQ(ierr); 6825 xadj[0] = 0; 6826 xadj[1] = PetscMax(n_neighs-1,0); 6827 ierr = PetscMalloc1(xadj[1],&adjncy);CHKERRQ(ierr); 6828 ierr = PetscMalloc1(xadj[1],&adjncy_wgt);CHKERRQ(ierr); 6829 ierr = PetscCalloc1(n,&count);CHKERRQ(ierr); 6830 for (i=1;i<n_neighs;i++) 6831 for (j=0;j<n_shared[i];j++) 6832 count[shared[i][j]] += 1; 6833 6834 xadj_count = 0; 6835 for (i=1;i<n_neighs;i++) { 6836 for (j=0;j<n_shared[i];j++) { 6837 if (count[shared[i][j]] < threshold) { 6838 adjncy[xadj_count] = neighs[i]; 6839 adjncy_wgt[xadj_count] = n_shared[i]; 6840 xadj_count++; 6841 break; 6842 } 6843 } 6844 } 6845 xadj[1] = xadj_count; 6846 ierr = PetscFree(count);CHKERRQ(ierr); 6847 ierr = ISLocalToGlobalMappingRestoreInfo(mat->rmap->mapping,&n_neighs,&neighs,&n_shared,&shared);CHKERRQ(ierr); 6848 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6849 6850 ierr = PetscMalloc1(1,&ranks_send_to_idx);CHKERRQ(ierr); 6851 6852 /* Restrict work on active processes only */ 6853 ierr = PetscMPIIntCast(im_active,&color);CHKERRQ(ierr); 6854 if (void_procs) { 6855 ierr = PetscSubcommCreate(PetscObjectComm((PetscObject)mat),&psubcomm);CHKERRQ(ierr); 6856 ierr = PetscSubcommSetNumber(psubcomm,2);CHKERRQ(ierr); /* 2 groups, active process and not active processes */ 6857 ierr = PetscSubcommSetTypeGeneral(psubcomm,color,rank);CHKERRQ(ierr); 6858 subcomm = PetscSubcommChild(psubcomm); 6859 } else { 6860 psubcomm = NULL; 6861 subcomm = PetscObjectComm((PetscObject)mat); 6862 } 6863 6864 v_wgt = NULL; 6865 if (!color) { 6866 ierr = PetscFree(xadj);CHKERRQ(ierr); 6867 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6868 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6869 } else { 6870 Mat subdomain_adj; 6871 IS new_ranks,new_ranks_contig; 6872 MatPartitioning partitioner; 6873 PetscInt rstart=0,rend=0; 6874 PetscInt *is_indices,*oldranks; 6875 PetscMPIInt size; 6876 PetscBool aggregate; 6877 6878 ierr = MPI_Comm_size(subcomm,&size);CHKERRQ(ierr); 6879 if (void_procs) { 6880 PetscInt prank = rank; 6881 ierr = PetscMalloc1(size,&oldranks);CHKERRQ(ierr); 6882 ierr = MPI_Allgather(&prank,1,MPIU_INT,oldranks,1,MPIU_INT,subcomm);CHKERRQ(ierr); 6883 for (i=0;i<xadj[1];i++) { 6884 ierr = PetscFindInt(adjncy[i],size,oldranks,&adjncy[i]);CHKERRQ(ierr); 6885 } 6886 ierr = PetscSortIntWithArray(xadj[1],adjncy,adjncy_wgt);CHKERRQ(ierr); 6887 } else { 6888 oldranks = NULL; 6889 } 6890 aggregate = ((redprocs > 0 && redprocs < size) ? PETSC_TRUE : PETSC_FALSE); 6891 if (aggregate) { /* TODO: all this part could be made more efficient */ 6892 PetscInt lrows,row,ncols,*cols; 6893 PetscMPIInt nrank; 6894 PetscScalar *vals; 6895 6896 ierr = MPI_Comm_rank(subcomm,&nrank);CHKERRQ(ierr); 6897 lrows = 0; 6898 if (nrank<redprocs) { 6899 lrows = size/redprocs; 6900 if (nrank<size%redprocs) lrows++; 6901 } 6902 ierr = MatCreateAIJ(subcomm,lrows,lrows,size,size,50,NULL,50,NULL,&subdomain_adj);CHKERRQ(ierr); 6903 ierr = MatGetOwnershipRange(subdomain_adj,&rstart,&rend);CHKERRQ(ierr); 6904 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_LOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6905 ierr = MatSetOption(subdomain_adj,MAT_NEW_NONZERO_ALLOCATION_ERR,PETSC_FALSE);CHKERRQ(ierr); 6906 row = nrank; 6907 ncols = xadj[1]-xadj[0]; 6908 cols = adjncy; 6909 ierr = PetscMalloc1(ncols,&vals);CHKERRQ(ierr); 6910 for (i=0;i<ncols;i++) vals[i] = adjncy_wgt[i]; 6911 ierr = MatSetValues(subdomain_adj,1,&row,ncols,cols,vals,INSERT_VALUES);CHKERRQ(ierr); 6912 ierr = MatAssemblyBegin(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6913 ierr = MatAssemblyEnd(subdomain_adj,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 6914 ierr = PetscFree(xadj);CHKERRQ(ierr); 6915 ierr = PetscFree(adjncy);CHKERRQ(ierr); 6916 ierr = PetscFree(adjncy_wgt);CHKERRQ(ierr); 6917 ierr = PetscFree(vals);CHKERRQ(ierr); 6918 if (use_vwgt) { 6919 Vec v; 6920 const PetscScalar *array; 6921 PetscInt nl; 6922 6923 ierr = MatCreateVecs(subdomain_adj,&v,NULL);CHKERRQ(ierr); 6924 ierr = VecSetValue(v,row,(PetscScalar)n,INSERT_VALUES);CHKERRQ(ierr); 6925 ierr = VecAssemblyBegin(v);CHKERRQ(ierr); 6926 ierr = VecAssemblyEnd(v);CHKERRQ(ierr); 6927 ierr = VecGetLocalSize(v,&nl);CHKERRQ(ierr); 6928 ierr = VecGetArrayRead(v,&array);CHKERRQ(ierr); 6929 ierr = PetscMalloc1(nl,&v_wgt);CHKERRQ(ierr); 6930 for (i=0;i<nl;i++) v_wgt[i] = (PetscInt)PetscRealPart(array[i]); 6931 ierr = VecRestoreArrayRead(v,&array);CHKERRQ(ierr); 6932 ierr = VecDestroy(&v);CHKERRQ(ierr); 6933 } 6934 } else { 6935 ierr = MatCreateMPIAdj(subcomm,1,(PetscInt)size,xadj,adjncy,adjncy_wgt,&subdomain_adj);CHKERRQ(ierr); 6936 if (use_vwgt) { 6937 ierr = PetscMalloc1(1,&v_wgt);CHKERRQ(ierr); 6938 v_wgt[0] = n; 6939 } 6940 } 6941 /* ierr = MatView(subdomain_adj,0);CHKERRQ(ierr); */ 6942 6943 /* Partition */ 6944 ierr = MatPartitioningCreate(subcomm,&partitioner);CHKERRQ(ierr); 6945 ierr = MatPartitioningSetAdjacency(partitioner,subdomain_adj);CHKERRQ(ierr); 6946 if (v_wgt) { 6947 ierr = MatPartitioningSetVertexWeights(partitioner,v_wgt);CHKERRQ(ierr); 6948 } 6949 *n_subdomains = PetscMin((PetscInt)size,*n_subdomains); 6950 ierr = MatPartitioningSetNParts(partitioner,*n_subdomains);CHKERRQ(ierr); 6951 ierr = MatPartitioningSetFromOptions(partitioner);CHKERRQ(ierr); 6952 ierr = MatPartitioningApply(partitioner,&new_ranks);CHKERRQ(ierr); 6953 /* ierr = MatPartitioningView(partitioner,0);CHKERRQ(ierr); */ 6954 6955 /* renumber new_ranks to avoid "holes" in new set of processors */ 6956 ierr = ISRenumber(new_ranks,NULL,NULL,&new_ranks_contig);CHKERRQ(ierr); 6957 ierr = ISDestroy(&new_ranks);CHKERRQ(ierr); 6958 ierr = ISGetIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6959 if (!aggregate) { 6960 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6961 #if defined(PETSC_USE_DEBUG) 6962 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6963 #endif 6964 ranks_send_to_idx[0] = procs_candidates[oldranks[is_indices[0]]]; 6965 } else if (oldranks) { 6966 ranks_send_to_idx[0] = oldranks[is_indices[0]]; 6967 } else { 6968 ranks_send_to_idx[0] = is_indices[0]; 6969 } 6970 } else { 6971 PetscInt idx = 0; 6972 PetscMPIInt tag; 6973 MPI_Request *reqs; 6974 6975 ierr = PetscObjectGetNewTag((PetscObject)subdomain_adj,&tag);CHKERRQ(ierr); 6976 ierr = PetscMalloc1(rend-rstart,&reqs);CHKERRQ(ierr); 6977 for (i=rstart;i<rend;i++) { 6978 ierr = MPI_Isend(is_indices+i-rstart,1,MPIU_INT,i,tag,subcomm,&reqs[i-rstart]);CHKERRQ(ierr); 6979 } 6980 ierr = MPI_Recv(&idx,1,MPIU_INT,MPI_ANY_SOURCE,tag,subcomm,MPI_STATUS_IGNORE);CHKERRQ(ierr); 6981 ierr = MPI_Waitall(rend-rstart,reqs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 6982 ierr = PetscFree(reqs);CHKERRQ(ierr); 6983 if (procs_candidates) { /* shift the pattern on non-active candidates (if any) */ 6984 #if defined(PETSC_USE_DEBUG) 6985 if (!oldranks) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"This should not happen"); 6986 #endif 6987 ranks_send_to_idx[0] = procs_candidates[oldranks[idx]]; 6988 } else if (oldranks) { 6989 ranks_send_to_idx[0] = oldranks[idx]; 6990 } else { 6991 ranks_send_to_idx[0] = idx; 6992 } 6993 } 6994 ierr = ISRestoreIndices(new_ranks_contig,(const PetscInt**)&is_indices);CHKERRQ(ierr); 6995 /* clean up */ 6996 ierr = PetscFree(oldranks);CHKERRQ(ierr); 6997 ierr = ISDestroy(&new_ranks_contig);CHKERRQ(ierr); 6998 ierr = MatDestroy(&subdomain_adj);CHKERRQ(ierr); 6999 ierr = MatPartitioningDestroy(&partitioner);CHKERRQ(ierr); 7000 } 7001 ierr = PetscSubcommDestroy(&psubcomm);CHKERRQ(ierr); 7002 ierr = PetscFree(procs_candidates);CHKERRQ(ierr); 7003 7004 /* assemble parallel IS for sends */ 7005 i = 1; 7006 if (!color) i=0; 7007 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)mat),i,ranks_send_to_idx,PETSC_OWN_POINTER,is_sends);CHKERRQ(ierr); 7008 PetscFunctionReturn(0); 7009 } 7010 7011 typedef enum {MATDENSE_PRIVATE=0,MATAIJ_PRIVATE,MATBAIJ_PRIVATE,MATSBAIJ_PRIVATE}MatTypePrivate; 7012 7013 PetscErrorCode PCBDDCMatISSubassemble(Mat mat, IS is_sends, PetscInt n_subdomains, PetscBool restrict_comm, PetscBool restrict_full, PetscBool reuse, Mat *mat_n, PetscInt nis, IS isarray[], PetscInt nvecs, Vec nnsp_vec[]) 7014 { 7015 Mat local_mat; 7016 IS is_sends_internal; 7017 PetscInt rows,cols,new_local_rows; 7018 PetscInt i,bs,buf_size_idxs,buf_size_idxs_is,buf_size_vals,buf_size_vecs; 7019 PetscBool ismatis,isdense,newisdense,destroy_mat; 7020 ISLocalToGlobalMapping l2gmap; 7021 PetscInt* l2gmap_indices; 7022 const PetscInt* is_indices; 7023 MatType new_local_type; 7024 /* buffers */ 7025 PetscInt *ptr_idxs,*send_buffer_idxs,*recv_buffer_idxs; 7026 PetscInt *ptr_idxs_is,*send_buffer_idxs_is,*recv_buffer_idxs_is; 7027 PetscInt *recv_buffer_idxs_local; 7028 PetscScalar *ptr_vals,*send_buffer_vals,*recv_buffer_vals; 7029 PetscScalar *ptr_vecs,*send_buffer_vecs,*recv_buffer_vecs; 7030 /* MPI */ 7031 MPI_Comm comm,comm_n; 7032 PetscSubcomm subcomm; 7033 PetscMPIInt n_sends,n_recvs,commsize; 7034 PetscMPIInt *iflags,*ilengths_idxs,*ilengths_vals,*ilengths_idxs_is; 7035 PetscMPIInt *onodes,*onodes_is,*olengths_idxs,*olengths_idxs_is,*olengths_vals; 7036 PetscMPIInt len,tag_idxs,tag_idxs_is,tag_vals,tag_vecs,source_dest; 7037 MPI_Request *send_req_idxs,*send_req_idxs_is,*send_req_vals,*send_req_vecs; 7038 MPI_Request *recv_req_idxs,*recv_req_idxs_is,*recv_req_vals,*recv_req_vecs; 7039 PetscErrorCode ierr; 7040 7041 PetscFunctionBegin; 7042 PetscValidHeaderSpecific(mat,MAT_CLASSID,1); 7043 ierr = PetscObjectTypeCompare((PetscObject)mat,MATIS,&ismatis);CHKERRQ(ierr); 7044 if (!ismatis) SETERRQ1(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot use %s on a matrix object which is not of type MATIS",PETSC_FUNCTION_NAME); 7045 PetscValidLogicalCollectiveInt(mat,n_subdomains,3); 7046 PetscValidLogicalCollectiveBool(mat,restrict_comm,4); 7047 PetscValidLogicalCollectiveBool(mat,restrict_full,5); 7048 PetscValidLogicalCollectiveBool(mat,reuse,6); 7049 PetscValidLogicalCollectiveInt(mat,nis,8); 7050 PetscValidLogicalCollectiveInt(mat,nvecs,10); 7051 if (nvecs) { 7052 if (nvecs > 1) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Just 1 vector supported"); 7053 PetscValidHeaderSpecific(nnsp_vec[0],VEC_CLASSID,11); 7054 } 7055 /* further checks */ 7056 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7057 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&isdense);CHKERRQ(ierr); 7058 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Currently cannot subassemble MATIS when local matrix type is not of type SEQDENSE"); 7059 ierr = MatGetSize(local_mat,&rows,&cols);CHKERRQ(ierr); 7060 if (rows != cols) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Local MATIS matrices should be square"); 7061 if (reuse && *mat_n) { 7062 PetscInt mrows,mcols,mnrows,mncols; 7063 PetscValidHeaderSpecific(*mat_n,MAT_CLASSID,7); 7064 ierr = PetscObjectTypeCompare((PetscObject)*mat_n,MATIS,&ismatis);CHKERRQ(ierr); 7065 if (!ismatis) SETERRQ(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_SUP,"Cannot reuse a matrix which is not of type MATIS"); 7066 ierr = MatGetSize(mat,&mrows,&mcols);CHKERRQ(ierr); 7067 ierr = MatGetSize(*mat_n,&mnrows,&mncols);CHKERRQ(ierr); 7068 if (mrows != mnrows) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of rows %D != %D",mrows,mnrows); 7069 if (mcols != mncols) SETERRQ2(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Cannot reuse matrix! Wrong number of cols %D != %D",mcols,mncols); 7070 } 7071 ierr = MatGetBlockSize(local_mat,&bs);CHKERRQ(ierr); 7072 PetscValidLogicalCollectiveInt(mat,bs,0); 7073 7074 /* prepare IS for sending if not provided */ 7075 if (!is_sends) { 7076 if (!n_subdomains) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"You should specify either an IS or a target number of subdomains"); 7077 ierr = PCBDDCMatISGetSubassemblingPattern(mat,&n_subdomains,0,&is_sends_internal,NULL);CHKERRQ(ierr); 7078 } else { 7079 ierr = PetscObjectReference((PetscObject)is_sends);CHKERRQ(ierr); 7080 is_sends_internal = is_sends; 7081 } 7082 7083 /* get comm */ 7084 ierr = PetscObjectGetComm((PetscObject)mat,&comm);CHKERRQ(ierr); 7085 7086 /* compute number of sends */ 7087 ierr = ISGetLocalSize(is_sends_internal,&i);CHKERRQ(ierr); 7088 ierr = PetscMPIIntCast(i,&n_sends);CHKERRQ(ierr); 7089 7090 /* compute number of receives */ 7091 ierr = MPI_Comm_size(comm,&commsize);CHKERRQ(ierr); 7092 ierr = PetscMalloc1(commsize,&iflags);CHKERRQ(ierr); 7093 ierr = PetscMemzero(iflags,commsize*sizeof(*iflags));CHKERRQ(ierr); 7094 ierr = ISGetIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7095 for (i=0;i<n_sends;i++) iflags[is_indices[i]] = 1; 7096 ierr = PetscGatherNumberOfMessages(comm,iflags,NULL,&n_recvs);CHKERRQ(ierr); 7097 ierr = PetscFree(iflags);CHKERRQ(ierr); 7098 7099 /* restrict comm if requested */ 7100 subcomm = 0; 7101 destroy_mat = PETSC_FALSE; 7102 if (restrict_comm) { 7103 PetscMPIInt color,subcommsize; 7104 7105 color = 0; 7106 if (restrict_full) { 7107 if (!n_recvs) color = 1; /* processes not receiving anything will not partecipate in new comm (full restriction) */ 7108 } else { 7109 if (!n_recvs && n_sends) color = 1; /* just those processes that are sending but not receiving anything will not partecipate in new comm */ 7110 } 7111 ierr = MPIU_Allreduce(&color,&subcommsize,1,MPI_INT,MPI_SUM,comm);CHKERRQ(ierr); 7112 subcommsize = commsize - subcommsize; 7113 /* check if reuse has been requested */ 7114 if (reuse) { 7115 if (*mat_n) { 7116 PetscMPIInt subcommsize2; 7117 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)*mat_n),&subcommsize2);CHKERRQ(ierr); 7118 if (subcommsize != subcommsize2) SETERRQ2(PetscObjectComm((PetscObject)*mat_n),PETSC_ERR_PLIB,"Cannot reuse matrix! wrong subcomm size %d != %d",subcommsize,subcommsize2); 7119 comm_n = PetscObjectComm((PetscObject)*mat_n); 7120 } else { 7121 comm_n = PETSC_COMM_SELF; 7122 } 7123 } else { /* MAT_INITIAL_MATRIX */ 7124 PetscMPIInt rank; 7125 7126 ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr); 7127 ierr = PetscSubcommCreate(comm,&subcomm);CHKERRQ(ierr); 7128 ierr = PetscSubcommSetNumber(subcomm,2);CHKERRQ(ierr); 7129 ierr = PetscSubcommSetTypeGeneral(subcomm,color,rank);CHKERRQ(ierr); 7130 comm_n = PetscSubcommChild(subcomm); 7131 } 7132 /* flag to destroy *mat_n if not significative */ 7133 if (color) destroy_mat = PETSC_TRUE; 7134 } else { 7135 comm_n = comm; 7136 } 7137 7138 /* prepare send/receive buffers */ 7139 ierr = PetscMalloc1(commsize,&ilengths_idxs);CHKERRQ(ierr); 7140 ierr = PetscMemzero(ilengths_idxs,commsize*sizeof(*ilengths_idxs));CHKERRQ(ierr); 7141 ierr = PetscMalloc1(commsize,&ilengths_vals);CHKERRQ(ierr); 7142 ierr = PetscMemzero(ilengths_vals,commsize*sizeof(*ilengths_vals));CHKERRQ(ierr); 7143 if (nis) { 7144 ierr = PetscCalloc1(commsize,&ilengths_idxs_is);CHKERRQ(ierr); 7145 } 7146 7147 /* Get data from local matrices */ 7148 if (!isdense) SETERRQ(PetscObjectComm((PetscObject)mat),PETSC_ERR_SUP,"Subassembling of AIJ local matrices not yet implemented"); 7149 /* TODO: See below some guidelines on how to prepare the local buffers */ 7150 /* 7151 send_buffer_vals should contain the raw values of the local matrix 7152 send_buffer_idxs should contain: 7153 - MatType_PRIVATE type 7154 - PetscInt size_of_l2gmap 7155 - PetscInt global_row_indices[size_of_l2gmap] 7156 - PetscInt all_other_info_which_is_needed_to_compute_preallocation_and_set_values 7157 */ 7158 else { 7159 ierr = MatDenseGetArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7160 ierr = ISLocalToGlobalMappingGetSize(mat->rmap->mapping,&i);CHKERRQ(ierr); 7161 ierr = PetscMalloc1(i+2,&send_buffer_idxs);CHKERRQ(ierr); 7162 send_buffer_idxs[0] = (PetscInt)MATDENSE_PRIVATE; 7163 send_buffer_idxs[1] = i; 7164 ierr = ISLocalToGlobalMappingGetIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7165 ierr = PetscMemcpy(&send_buffer_idxs[2],ptr_idxs,i*sizeof(PetscInt));CHKERRQ(ierr); 7166 ierr = ISLocalToGlobalMappingRestoreIndices(mat->rmap->mapping,(const PetscInt**)&ptr_idxs);CHKERRQ(ierr); 7167 ierr = PetscMPIIntCast(i,&len);CHKERRQ(ierr); 7168 for (i=0;i<n_sends;i++) { 7169 ilengths_vals[is_indices[i]] = len*len; 7170 ilengths_idxs[is_indices[i]] = len+2; 7171 } 7172 } 7173 ierr = PetscGatherMessageLengths2(comm,n_sends,n_recvs,ilengths_idxs,ilengths_vals,&onodes,&olengths_idxs,&olengths_vals);CHKERRQ(ierr); 7174 /* additional is (if any) */ 7175 if (nis) { 7176 PetscMPIInt psum; 7177 PetscInt j; 7178 for (j=0,psum=0;j<nis;j++) { 7179 PetscInt plen; 7180 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7181 ierr = PetscMPIIntCast(plen,&len);CHKERRQ(ierr); 7182 psum += len+1; /* indices + lenght */ 7183 } 7184 ierr = PetscMalloc1(psum,&send_buffer_idxs_is);CHKERRQ(ierr); 7185 for (j=0,psum=0;j<nis;j++) { 7186 PetscInt plen; 7187 const PetscInt *is_array_idxs; 7188 ierr = ISGetLocalSize(isarray[j],&plen);CHKERRQ(ierr); 7189 send_buffer_idxs_is[psum] = plen; 7190 ierr = ISGetIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7191 ierr = PetscMemcpy(&send_buffer_idxs_is[psum+1],is_array_idxs,plen*sizeof(PetscInt));CHKERRQ(ierr); 7192 ierr = ISRestoreIndices(isarray[j],&is_array_idxs);CHKERRQ(ierr); 7193 psum += plen+1; /* indices + lenght */ 7194 } 7195 for (i=0;i<n_sends;i++) { 7196 ilengths_idxs_is[is_indices[i]] = psum; 7197 } 7198 ierr = PetscGatherMessageLengths(comm,n_sends,n_recvs,ilengths_idxs_is,&onodes_is,&olengths_idxs_is);CHKERRQ(ierr); 7199 } 7200 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7201 7202 buf_size_idxs = 0; 7203 buf_size_vals = 0; 7204 buf_size_idxs_is = 0; 7205 buf_size_vecs = 0; 7206 for (i=0;i<n_recvs;i++) { 7207 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7208 buf_size_vals += (PetscInt)olengths_vals[i]; 7209 if (nis) buf_size_idxs_is += (PetscInt)olengths_idxs_is[i]; 7210 if (nvecs) buf_size_vecs += (PetscInt)olengths_idxs[i]; 7211 } 7212 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs);CHKERRQ(ierr); 7213 ierr = PetscMalloc1(buf_size_vals,&recv_buffer_vals);CHKERRQ(ierr); 7214 ierr = PetscMalloc1(buf_size_idxs_is,&recv_buffer_idxs_is);CHKERRQ(ierr); 7215 ierr = PetscMalloc1(buf_size_vecs,&recv_buffer_vecs);CHKERRQ(ierr); 7216 7217 /* get new tags for clean communications */ 7218 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs);CHKERRQ(ierr); 7219 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vals);CHKERRQ(ierr); 7220 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_idxs_is);CHKERRQ(ierr); 7221 ierr = PetscObjectGetNewTag((PetscObject)mat,&tag_vecs);CHKERRQ(ierr); 7222 7223 /* allocate for requests */ 7224 ierr = PetscMalloc1(n_sends,&send_req_idxs);CHKERRQ(ierr); 7225 ierr = PetscMalloc1(n_sends,&send_req_vals);CHKERRQ(ierr); 7226 ierr = PetscMalloc1(n_sends,&send_req_idxs_is);CHKERRQ(ierr); 7227 ierr = PetscMalloc1(n_sends,&send_req_vecs);CHKERRQ(ierr); 7228 ierr = PetscMalloc1(n_recvs,&recv_req_idxs);CHKERRQ(ierr); 7229 ierr = PetscMalloc1(n_recvs,&recv_req_vals);CHKERRQ(ierr); 7230 ierr = PetscMalloc1(n_recvs,&recv_req_idxs_is);CHKERRQ(ierr); 7231 ierr = PetscMalloc1(n_recvs,&recv_req_vecs);CHKERRQ(ierr); 7232 7233 /* communications */ 7234 ptr_idxs = recv_buffer_idxs; 7235 ptr_vals = recv_buffer_vals; 7236 ptr_idxs_is = recv_buffer_idxs_is; 7237 ptr_vecs = recv_buffer_vecs; 7238 for (i=0;i<n_recvs;i++) { 7239 source_dest = onodes[i]; 7240 ierr = MPI_Irecv(ptr_idxs,olengths_idxs[i],MPIU_INT,source_dest,tag_idxs,comm,&recv_req_idxs[i]);CHKERRQ(ierr); 7241 ierr = MPI_Irecv(ptr_vals,olengths_vals[i],MPIU_SCALAR,source_dest,tag_vals,comm,&recv_req_vals[i]);CHKERRQ(ierr); 7242 ptr_idxs += olengths_idxs[i]; 7243 ptr_vals += olengths_vals[i]; 7244 if (nis) { 7245 source_dest = onodes_is[i]; 7246 ierr = MPI_Irecv(ptr_idxs_is,olengths_idxs_is[i],MPIU_INT,source_dest,tag_idxs_is,comm,&recv_req_idxs_is[i]);CHKERRQ(ierr); 7247 ptr_idxs_is += olengths_idxs_is[i]; 7248 } 7249 if (nvecs) { 7250 source_dest = onodes[i]; 7251 ierr = MPI_Irecv(ptr_vecs,olengths_idxs[i]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&recv_req_vecs[i]);CHKERRQ(ierr); 7252 ptr_vecs += olengths_idxs[i]-2; 7253 } 7254 } 7255 for (i=0;i<n_sends;i++) { 7256 ierr = PetscMPIIntCast(is_indices[i],&source_dest);CHKERRQ(ierr); 7257 ierr = MPI_Isend(send_buffer_idxs,ilengths_idxs[source_dest],MPIU_INT,source_dest,tag_idxs,comm,&send_req_idxs[i]);CHKERRQ(ierr); 7258 ierr = MPI_Isend(send_buffer_vals,ilengths_vals[source_dest],MPIU_SCALAR,source_dest,tag_vals,comm,&send_req_vals[i]);CHKERRQ(ierr); 7259 if (nis) { 7260 ierr = MPI_Isend(send_buffer_idxs_is,ilengths_idxs_is[source_dest],MPIU_INT,source_dest,tag_idxs_is,comm,&send_req_idxs_is[i]);CHKERRQ(ierr); 7261 } 7262 if (nvecs) { 7263 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7264 ierr = MPI_Isend(send_buffer_vecs,ilengths_idxs[source_dest]-2,MPIU_SCALAR,source_dest,tag_vecs,comm,&send_req_vecs[i]);CHKERRQ(ierr); 7265 } 7266 } 7267 ierr = ISRestoreIndices(is_sends_internal,&is_indices);CHKERRQ(ierr); 7268 ierr = ISDestroy(&is_sends_internal);CHKERRQ(ierr); 7269 7270 /* assemble new l2g map */ 7271 ierr = MPI_Waitall(n_recvs,recv_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7272 ptr_idxs = recv_buffer_idxs; 7273 new_local_rows = 0; 7274 for (i=0;i<n_recvs;i++) { 7275 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7276 ptr_idxs += olengths_idxs[i]; 7277 } 7278 ierr = PetscMalloc1(new_local_rows,&l2gmap_indices);CHKERRQ(ierr); 7279 ptr_idxs = recv_buffer_idxs; 7280 new_local_rows = 0; 7281 for (i=0;i<n_recvs;i++) { 7282 ierr = PetscMemcpy(&l2gmap_indices[new_local_rows],ptr_idxs+2,(*(ptr_idxs+1))*sizeof(PetscInt));CHKERRQ(ierr); 7283 new_local_rows += *(ptr_idxs+1); /* second element is the local size of the l2gmap */ 7284 ptr_idxs += olengths_idxs[i]; 7285 } 7286 ierr = PetscSortRemoveDupsInt(&new_local_rows,l2gmap_indices);CHKERRQ(ierr); 7287 ierr = ISLocalToGlobalMappingCreate(comm_n,1,new_local_rows,l2gmap_indices,PETSC_COPY_VALUES,&l2gmap);CHKERRQ(ierr); 7288 ierr = PetscFree(l2gmap_indices);CHKERRQ(ierr); 7289 7290 /* infer new local matrix type from received local matrices type */ 7291 /* currently if all local matrices are of type X, then the resulting matrix will be of type X, except for the dense case */ 7292 /* it also assumes that if the block size is set, than it is the same among all local matrices (see checks at the beginning of the function) */ 7293 if (n_recvs) { 7294 MatTypePrivate new_local_type_private = (MatTypePrivate)send_buffer_idxs[0]; 7295 ptr_idxs = recv_buffer_idxs; 7296 for (i=0;i<n_recvs;i++) { 7297 if ((PetscInt)new_local_type_private != *ptr_idxs) { 7298 new_local_type_private = MATAIJ_PRIVATE; 7299 break; 7300 } 7301 ptr_idxs += olengths_idxs[i]; 7302 } 7303 switch (new_local_type_private) { 7304 case MATDENSE_PRIVATE: 7305 new_local_type = MATSEQAIJ; 7306 bs = 1; 7307 break; 7308 case MATAIJ_PRIVATE: 7309 new_local_type = MATSEQAIJ; 7310 bs = 1; 7311 break; 7312 case MATBAIJ_PRIVATE: 7313 new_local_type = MATSEQBAIJ; 7314 break; 7315 case MATSBAIJ_PRIVATE: 7316 new_local_type = MATSEQSBAIJ; 7317 break; 7318 default: 7319 SETERRQ2(comm,PETSC_ERR_SUP,"Unsupported private type %d in %s",new_local_type_private,PETSC_FUNCTION_NAME); 7320 break; 7321 } 7322 } else { /* by default, new_local_type is seqaij */ 7323 new_local_type = MATSEQAIJ; 7324 bs = 1; 7325 } 7326 7327 /* create MATIS object if needed */ 7328 if (!reuse) { 7329 ierr = MatGetSize(mat,&rows,&cols);CHKERRQ(ierr); 7330 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7331 } else { 7332 /* it also destroys the local matrices */ 7333 if (*mat_n) { 7334 ierr = MatSetLocalToGlobalMapping(*mat_n,l2gmap,l2gmap);CHKERRQ(ierr); 7335 } else { /* this is a fake object */ 7336 ierr = MatCreateIS(comm_n,bs,PETSC_DECIDE,PETSC_DECIDE,rows,cols,l2gmap,NULL,mat_n);CHKERRQ(ierr); 7337 } 7338 } 7339 ierr = MatISGetLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7340 ierr = MatSetType(local_mat,new_local_type);CHKERRQ(ierr); 7341 7342 ierr = MPI_Waitall(n_recvs,recv_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7343 7344 /* Global to local map of received indices */ 7345 ierr = PetscMalloc1(buf_size_idxs,&recv_buffer_idxs_local);CHKERRQ(ierr); /* needed for values insertion */ 7346 ierr = ISGlobalToLocalMappingApply(l2gmap,IS_GTOLM_MASK,buf_size_idxs,recv_buffer_idxs,&i,recv_buffer_idxs_local);CHKERRQ(ierr); 7347 ierr = ISLocalToGlobalMappingDestroy(&l2gmap);CHKERRQ(ierr); 7348 7349 /* restore attributes -> type of incoming data and its size */ 7350 buf_size_idxs = 0; 7351 for (i=0;i<n_recvs;i++) { 7352 recv_buffer_idxs_local[buf_size_idxs] = recv_buffer_idxs[buf_size_idxs]; 7353 recv_buffer_idxs_local[buf_size_idxs+1] = recv_buffer_idxs[buf_size_idxs+1]; 7354 buf_size_idxs += (PetscInt)olengths_idxs[i]; 7355 } 7356 ierr = PetscFree(recv_buffer_idxs);CHKERRQ(ierr); 7357 7358 /* set preallocation */ 7359 ierr = PetscObjectTypeCompare((PetscObject)local_mat,MATSEQDENSE,&newisdense);CHKERRQ(ierr); 7360 if (!newisdense) { 7361 PetscInt *new_local_nnz=0; 7362 7363 ptr_idxs = recv_buffer_idxs_local; 7364 if (n_recvs) { 7365 ierr = PetscCalloc1(new_local_rows,&new_local_nnz);CHKERRQ(ierr); 7366 } 7367 for (i=0;i<n_recvs;i++) { 7368 PetscInt j; 7369 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* preallocation provided for dense case only */ 7370 for (j=0;j<*(ptr_idxs+1);j++) { 7371 new_local_nnz[*(ptr_idxs+2+j)] += *(ptr_idxs+1); 7372 } 7373 } else { 7374 /* TODO */ 7375 } 7376 ptr_idxs += olengths_idxs[i]; 7377 } 7378 if (new_local_nnz) { 7379 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMin(new_local_nnz[i],new_local_rows); 7380 ierr = MatSeqAIJSetPreallocation(local_mat,0,new_local_nnz);CHKERRQ(ierr); 7381 for (i=0;i<new_local_rows;i++) new_local_nnz[i] /= bs; 7382 ierr = MatSeqBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7383 for (i=0;i<new_local_rows;i++) new_local_nnz[i] = PetscMax(new_local_nnz[i]-i,0); 7384 ierr = MatSeqSBAIJSetPreallocation(local_mat,bs,0,new_local_nnz);CHKERRQ(ierr); 7385 } else { 7386 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7387 } 7388 ierr = PetscFree(new_local_nnz);CHKERRQ(ierr); 7389 } else { 7390 ierr = MatSetUp(local_mat);CHKERRQ(ierr); 7391 } 7392 7393 /* set values */ 7394 ptr_vals = recv_buffer_vals; 7395 ptr_idxs = recv_buffer_idxs_local; 7396 for (i=0;i<n_recvs;i++) { 7397 if (*ptr_idxs == (PetscInt)MATDENSE_PRIVATE) { /* values insertion provided for dense case only */ 7398 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_FALSE);CHKERRQ(ierr); 7399 ierr = MatSetValues(local_mat,*(ptr_idxs+1),ptr_idxs+2,*(ptr_idxs+1),ptr_idxs+2,ptr_vals,ADD_VALUES);CHKERRQ(ierr); 7400 ierr = MatAssemblyBegin(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7401 ierr = MatAssemblyEnd(local_mat,MAT_FLUSH_ASSEMBLY);CHKERRQ(ierr); 7402 ierr = MatSetOption(local_mat,MAT_ROW_ORIENTED,PETSC_TRUE);CHKERRQ(ierr); 7403 } else { 7404 /* TODO */ 7405 } 7406 ptr_idxs += olengths_idxs[i]; 7407 ptr_vals += olengths_vals[i]; 7408 } 7409 ierr = MatAssemblyBegin(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7410 ierr = MatAssemblyEnd(local_mat,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7411 ierr = MatISRestoreLocalMat(*mat_n,&local_mat);CHKERRQ(ierr); 7412 ierr = MatAssemblyBegin(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7413 ierr = MatAssemblyEnd(*mat_n,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7414 ierr = PetscFree(recv_buffer_vals);CHKERRQ(ierr); 7415 7416 #if 0 7417 if (!restrict_comm) { /* check */ 7418 Vec lvec,rvec; 7419 PetscReal infty_error; 7420 7421 ierr = MatCreateVecs(mat,&rvec,&lvec);CHKERRQ(ierr); 7422 ierr = VecSetRandom(rvec,NULL);CHKERRQ(ierr); 7423 ierr = MatMult(mat,rvec,lvec);CHKERRQ(ierr); 7424 ierr = VecScale(lvec,-1.0);CHKERRQ(ierr); 7425 ierr = MatMultAdd(*mat_n,rvec,lvec,lvec);CHKERRQ(ierr); 7426 ierr = VecNorm(lvec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 7427 ierr = PetscPrintf(PetscObjectComm((PetscObject)mat),"Infinity error subassembling %1.6e\n",infty_error); 7428 ierr = VecDestroy(&rvec);CHKERRQ(ierr); 7429 ierr = VecDestroy(&lvec);CHKERRQ(ierr); 7430 } 7431 #endif 7432 7433 /* assemble new additional is (if any) */ 7434 if (nis) { 7435 PetscInt **temp_idxs,*count_is,j,psum; 7436 7437 ierr = MPI_Waitall(n_recvs,recv_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7438 ierr = PetscCalloc1(nis,&count_is);CHKERRQ(ierr); 7439 ptr_idxs = recv_buffer_idxs_is; 7440 psum = 0; 7441 for (i=0;i<n_recvs;i++) { 7442 for (j=0;j<nis;j++) { 7443 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7444 count_is[j] += plen; /* increment counting of buffer for j-th IS */ 7445 psum += plen; 7446 ptr_idxs += plen+1; /* shift pointer to received data */ 7447 } 7448 } 7449 ierr = PetscMalloc1(nis,&temp_idxs);CHKERRQ(ierr); 7450 ierr = PetscMalloc1(psum,&temp_idxs[0]);CHKERRQ(ierr); 7451 for (i=1;i<nis;i++) { 7452 temp_idxs[i] = temp_idxs[i-1]+count_is[i-1]; 7453 } 7454 ierr = PetscMemzero(count_is,nis*sizeof(PetscInt));CHKERRQ(ierr); 7455 ptr_idxs = recv_buffer_idxs_is; 7456 for (i=0;i<n_recvs;i++) { 7457 for (j=0;j<nis;j++) { 7458 PetscInt plen = *(ptr_idxs); /* first element is the local size of IS's indices */ 7459 ierr = PetscMemcpy(&temp_idxs[j][count_is[j]],ptr_idxs+1,plen*sizeof(PetscInt));CHKERRQ(ierr); 7460 count_is[j] += plen; /* increment starting point of buffer for j-th IS */ 7461 ptr_idxs += plen+1; /* shift pointer to received data */ 7462 } 7463 } 7464 for (i=0;i<nis;i++) { 7465 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7466 ierr = PetscSortRemoveDupsInt(&count_is[i],temp_idxs[i]);CHKERRQ(ierr);CHKERRQ(ierr); 7467 ierr = ISCreateGeneral(comm_n,count_is[i],temp_idxs[i],PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7468 } 7469 ierr = PetscFree(count_is);CHKERRQ(ierr); 7470 ierr = PetscFree(temp_idxs[0]);CHKERRQ(ierr); 7471 ierr = PetscFree(temp_idxs);CHKERRQ(ierr); 7472 } 7473 /* free workspace */ 7474 ierr = PetscFree(recv_buffer_idxs_is);CHKERRQ(ierr); 7475 ierr = MPI_Waitall(n_sends,send_req_idxs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7476 ierr = PetscFree(send_buffer_idxs);CHKERRQ(ierr); 7477 ierr = MPI_Waitall(n_sends,send_req_vals,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7478 if (isdense) { 7479 ierr = MatISGetLocalMat(mat,&local_mat);CHKERRQ(ierr); 7480 ierr = MatDenseRestoreArray(local_mat,&send_buffer_vals);CHKERRQ(ierr); 7481 ierr = MatISRestoreLocalMat(mat,&local_mat);CHKERRQ(ierr); 7482 } else { 7483 /* ierr = PetscFree(send_buffer_vals);CHKERRQ(ierr); */ 7484 } 7485 if (nis) { 7486 ierr = MPI_Waitall(n_sends,send_req_idxs_is,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7487 ierr = PetscFree(send_buffer_idxs_is);CHKERRQ(ierr); 7488 } 7489 7490 if (nvecs) { 7491 ierr = MPI_Waitall(n_recvs,recv_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7492 ierr = MPI_Waitall(n_sends,send_req_vecs,MPI_STATUSES_IGNORE);CHKERRQ(ierr); 7493 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7494 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7495 ierr = VecCreate(comm_n,&nnsp_vec[0]);CHKERRQ(ierr); 7496 ierr = VecSetSizes(nnsp_vec[0],new_local_rows,PETSC_DECIDE);CHKERRQ(ierr); 7497 ierr = VecSetType(nnsp_vec[0],VECSTANDARD);CHKERRQ(ierr); 7498 /* set values */ 7499 ptr_vals = recv_buffer_vecs; 7500 ptr_idxs = recv_buffer_idxs_local; 7501 ierr = VecGetArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7502 for (i=0;i<n_recvs;i++) { 7503 PetscInt j; 7504 for (j=0;j<*(ptr_idxs+1);j++) { 7505 send_buffer_vecs[*(ptr_idxs+2+j)] += *(ptr_vals + j); 7506 } 7507 ptr_idxs += olengths_idxs[i]; 7508 ptr_vals += olengths_idxs[i]-2; 7509 } 7510 ierr = VecRestoreArray(nnsp_vec[0],&send_buffer_vecs);CHKERRQ(ierr); 7511 ierr = VecAssemblyBegin(nnsp_vec[0]);CHKERRQ(ierr); 7512 ierr = VecAssemblyEnd(nnsp_vec[0]);CHKERRQ(ierr); 7513 } 7514 7515 ierr = PetscFree(recv_buffer_vecs);CHKERRQ(ierr); 7516 ierr = PetscFree(recv_buffer_idxs_local);CHKERRQ(ierr); 7517 ierr = PetscFree(recv_req_idxs);CHKERRQ(ierr); 7518 ierr = PetscFree(recv_req_vals);CHKERRQ(ierr); 7519 ierr = PetscFree(recv_req_vecs);CHKERRQ(ierr); 7520 ierr = PetscFree(recv_req_idxs_is);CHKERRQ(ierr); 7521 ierr = PetscFree(send_req_idxs);CHKERRQ(ierr); 7522 ierr = PetscFree(send_req_vals);CHKERRQ(ierr); 7523 ierr = PetscFree(send_req_vecs);CHKERRQ(ierr); 7524 ierr = PetscFree(send_req_idxs_is);CHKERRQ(ierr); 7525 ierr = PetscFree(ilengths_vals);CHKERRQ(ierr); 7526 ierr = PetscFree(ilengths_idxs);CHKERRQ(ierr); 7527 ierr = PetscFree(olengths_vals);CHKERRQ(ierr); 7528 ierr = PetscFree(olengths_idxs);CHKERRQ(ierr); 7529 ierr = PetscFree(onodes);CHKERRQ(ierr); 7530 if (nis) { 7531 ierr = PetscFree(ilengths_idxs_is);CHKERRQ(ierr); 7532 ierr = PetscFree(olengths_idxs_is);CHKERRQ(ierr); 7533 ierr = PetscFree(onodes_is);CHKERRQ(ierr); 7534 } 7535 ierr = PetscSubcommDestroy(&subcomm);CHKERRQ(ierr); 7536 if (destroy_mat) { /* destroy mat is true only if restrict comm is true and process will not partecipate */ 7537 ierr = MatDestroy(mat_n);CHKERRQ(ierr); 7538 for (i=0;i<nis;i++) { 7539 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7540 } 7541 if (nvecs) { /* need to match VecDestroy nnsp_vec called in the other code path */ 7542 ierr = VecDestroy(&nnsp_vec[0]);CHKERRQ(ierr); 7543 } 7544 *mat_n = NULL; 7545 } 7546 PetscFunctionReturn(0); 7547 } 7548 7549 /* temporary hack into ksp private data structure */ 7550 #include <petsc/private/kspimpl.h> 7551 7552 PetscErrorCode PCBDDCSetUpCoarseSolver(PC pc,PetscScalar* coarse_submat_vals) 7553 { 7554 PC_BDDC *pcbddc = (PC_BDDC*)pc->data; 7555 PC_IS *pcis = (PC_IS*)pc->data; 7556 Mat coarse_mat,coarse_mat_is,coarse_submat_dense; 7557 Mat coarsedivudotp = NULL; 7558 Mat coarseG,t_coarse_mat_is; 7559 MatNullSpace CoarseNullSpace = NULL; 7560 ISLocalToGlobalMapping coarse_islg; 7561 IS coarse_is,*isarray; 7562 PetscInt i,im_active=-1,active_procs=-1; 7563 PetscInt nis,nisdofs,nisneu,nisvert; 7564 PC pc_temp; 7565 PCType coarse_pc_type; 7566 KSPType coarse_ksp_type; 7567 PetscBool multilevel_requested,multilevel_allowed; 7568 PetscBool coarse_reuse; 7569 PetscInt ncoarse,nedcfield; 7570 PetscBool compute_vecs = PETSC_FALSE; 7571 PetscScalar *array; 7572 MatReuse coarse_mat_reuse; 7573 PetscBool restr, full_restr, have_void; 7574 PetscMPIInt commsize; 7575 PetscErrorCode ierr; 7576 7577 PetscFunctionBegin; 7578 /* Assign global numbering to coarse dofs */ 7579 if (pcbddc->new_primal_space || pcbddc->coarse_size == -1) { /* a new primal space is present or it is the first initialization, so recompute global numbering */ 7580 PetscInt ocoarse_size; 7581 compute_vecs = PETSC_TRUE; 7582 7583 pcbddc->new_primal_space = PETSC_TRUE; 7584 ocoarse_size = pcbddc->coarse_size; 7585 ierr = PetscFree(pcbddc->global_primal_indices);CHKERRQ(ierr); 7586 ierr = PCBDDCComputePrimalNumbering(pc,&pcbddc->coarse_size,&pcbddc->global_primal_indices);CHKERRQ(ierr); 7587 /* see if we can avoid some work */ 7588 if (pcbddc->coarse_ksp) { /* coarse ksp has already been created */ 7589 /* if the coarse size is different or we are using adaptive selection, better to not reuse the coarse matrix */ 7590 if (ocoarse_size != pcbddc->coarse_size || pcbddc->adaptive_selection) { 7591 ierr = KSPReset(pcbddc->coarse_ksp);CHKERRQ(ierr); 7592 coarse_reuse = PETSC_FALSE; 7593 } else { /* we can safely reuse already computed coarse matrix */ 7594 coarse_reuse = PETSC_TRUE; 7595 } 7596 } else { /* there's no coarse ksp, so we need to create the coarse matrix too */ 7597 coarse_reuse = PETSC_FALSE; 7598 } 7599 /* reset any subassembling information */ 7600 if (!coarse_reuse || pcbddc->recompute_topography) { 7601 ierr = ISDestroy(&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7602 } 7603 } else { /* primal space is unchanged, so we can reuse coarse matrix */ 7604 coarse_reuse = PETSC_TRUE; 7605 } 7606 /* assemble coarse matrix */ 7607 if (coarse_reuse && pcbddc->coarse_ksp) { 7608 ierr = KSPGetOperators(pcbddc->coarse_ksp,&coarse_mat,NULL);CHKERRQ(ierr); 7609 ierr = PetscObjectReference((PetscObject)coarse_mat);CHKERRQ(ierr); 7610 coarse_mat_reuse = MAT_REUSE_MATRIX; 7611 } else { 7612 coarse_mat = NULL; 7613 coarse_mat_reuse = MAT_INITIAL_MATRIX; 7614 } 7615 7616 /* creates temporary l2gmap and IS for coarse indexes */ 7617 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),pcbddc->local_primal_size,pcbddc->global_primal_indices,PETSC_COPY_VALUES,&coarse_is);CHKERRQ(ierr); 7618 ierr = ISLocalToGlobalMappingCreateIS(coarse_is,&coarse_islg);CHKERRQ(ierr); 7619 7620 /* creates temporary MATIS object for coarse matrix */ 7621 ierr = MatCreateSeqDense(PETSC_COMM_SELF,pcbddc->local_primal_size,pcbddc->local_primal_size,NULL,&coarse_submat_dense);CHKERRQ(ierr); 7622 ierr = MatDenseGetArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7623 ierr = PetscMemcpy(array,coarse_submat_vals,sizeof(*coarse_submat_vals)*pcbddc->local_primal_size*pcbddc->local_primal_size);CHKERRQ(ierr); 7624 ierr = MatDenseRestoreArray(coarse_submat_dense,&array);CHKERRQ(ierr); 7625 ierr = MatCreateIS(PetscObjectComm((PetscObject)pc),1,PETSC_DECIDE,PETSC_DECIDE,pcbddc->coarse_size,pcbddc->coarse_size,coarse_islg,NULL,&t_coarse_mat_is);CHKERRQ(ierr); 7626 ierr = MatISSetLocalMat(t_coarse_mat_is,coarse_submat_dense);CHKERRQ(ierr); 7627 ierr = MatAssemblyBegin(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7628 ierr = MatAssemblyEnd(t_coarse_mat_is,MAT_FINAL_ASSEMBLY);CHKERRQ(ierr); 7629 ierr = MatDestroy(&coarse_submat_dense);CHKERRQ(ierr); 7630 7631 /* count "active" (i.e. with positive local size) and "void" processes */ 7632 im_active = !!(pcis->n); 7633 ierr = MPIU_Allreduce(&im_active,&active_procs,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7634 7635 /* determine number of processes partecipating to coarse solver and compute subassembling pattern */ 7636 /* restr : whether if we want to exclude senders (which are not receivers) from the subassembling pattern */ 7637 /* full_restr : just use the receivers from the subassembling pattern */ 7638 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)pc),&commsize);CHKERRQ(ierr); 7639 coarse_mat_is = NULL; 7640 multilevel_allowed = PETSC_FALSE; 7641 multilevel_requested = PETSC_FALSE; 7642 pcbddc->coarse_eqs_per_proc = PetscMin(PetscMax(pcbddc->coarse_size,1),pcbddc->coarse_eqs_per_proc); 7643 if (pcbddc->current_level < pcbddc->max_levels) multilevel_requested = PETSC_TRUE; 7644 if (multilevel_requested) { 7645 ncoarse = active_procs/pcbddc->coarsening_ratio; 7646 restr = PETSC_FALSE; 7647 full_restr = PETSC_FALSE; 7648 } else { 7649 ncoarse = pcbddc->coarse_size/pcbddc->coarse_eqs_per_proc; 7650 restr = PETSC_TRUE; 7651 full_restr = PETSC_TRUE; 7652 } 7653 if (!pcbddc->coarse_size || commsize == 1) multilevel_allowed = multilevel_requested = restr = full_restr = PETSC_FALSE; 7654 ncoarse = PetscMax(1,ncoarse); 7655 if (!pcbddc->coarse_subassembling) { 7656 if (pcbddc->coarsening_ratio > 1) { 7657 if (multilevel_requested) { 7658 ierr = PCBDDCMatISGetSubassemblingPattern(pc->pmat,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7659 } else { 7660 ierr = PCBDDCMatISGetSubassemblingPattern(t_coarse_mat_is,&ncoarse,pcbddc->coarse_adj_red,&pcbddc->coarse_subassembling,&have_void);CHKERRQ(ierr); 7661 } 7662 } else { 7663 PetscMPIInt rank; 7664 ierr = MPI_Comm_rank(PetscObjectComm((PetscObject)pc),&rank);CHKERRQ(ierr); 7665 have_void = (active_procs == (PetscInt)commsize) ? PETSC_FALSE : PETSC_TRUE; 7666 ierr = ISCreateStride(PetscObjectComm((PetscObject)pc),1,rank,1,&pcbddc->coarse_subassembling);CHKERRQ(ierr); 7667 } 7668 } else { /* if a subassembling pattern exists, then we can reuse the coarse ksp and compute the number of process involved */ 7669 PetscInt psum; 7670 if (pcbddc->coarse_ksp) psum = 1; 7671 else psum = 0; 7672 ierr = MPIU_Allreduce(&psum,&ncoarse,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7673 if (ncoarse < commsize) have_void = PETSC_TRUE; 7674 } 7675 /* determine if we can go multilevel */ 7676 if (multilevel_requested) { 7677 if (ncoarse > 1) multilevel_allowed = PETSC_TRUE; /* found enough processes */ 7678 else restr = full_restr = PETSC_TRUE; /* 1 subdomain, use a direct solver */ 7679 } 7680 if (multilevel_allowed && have_void) restr = PETSC_TRUE; 7681 7682 /* dump subassembling pattern */ 7683 if (pcbddc->dbg_flag && multilevel_allowed) { 7684 ierr = ISView(pcbddc->coarse_subassembling,pcbddc->dbg_viewer);CHKERRQ(ierr); 7685 } 7686 7687 /* compute dofs splitting and neumann boundaries for coarse dofs */ 7688 nedcfield = -1; 7689 if (multilevel_allowed && !coarse_reuse && (pcbddc->n_ISForDofsLocal || pcbddc->NeumannBoundariesLocal || pcbddc->nedclocal)) { /* protects from unneded computations */ 7690 PetscInt *tidxs,*tidxs2,nout,tsize,i; 7691 const PetscInt *idxs; 7692 ISLocalToGlobalMapping tmap; 7693 7694 /* create map between primal indices (in local representative ordering) and local primal numbering */ 7695 ierr = ISLocalToGlobalMappingCreate(PETSC_COMM_SELF,1,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,PETSC_COPY_VALUES,&tmap);CHKERRQ(ierr); 7696 /* allocate space for temporary storage */ 7697 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs);CHKERRQ(ierr); 7698 ierr = PetscMalloc1(pcbddc->local_primal_size,&tidxs2);CHKERRQ(ierr); 7699 /* allocate for IS array */ 7700 nisdofs = pcbddc->n_ISForDofsLocal; 7701 if (pcbddc->nedclocal) { 7702 if (pcbddc->nedfield > -1) { 7703 nedcfield = pcbddc->nedfield; 7704 } else { 7705 nedcfield = 0; 7706 if (nisdofs) SETERRQ1(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"This should not happen (%d)",nisdofs); 7707 nisdofs = 1; 7708 } 7709 } 7710 nisneu = !!pcbddc->NeumannBoundariesLocal; 7711 nisvert = 0; /* nisvert is not used */ 7712 nis = nisdofs + nisneu + nisvert; 7713 ierr = PetscMalloc1(nis,&isarray);CHKERRQ(ierr); 7714 /* dofs splitting */ 7715 for (i=0;i<nisdofs;i++) { 7716 /* ierr = ISView(pcbddc->ISForDofsLocal[i],0);CHKERRQ(ierr); */ 7717 if (nedcfield != i) { 7718 ierr = ISGetLocalSize(pcbddc->ISForDofsLocal[i],&tsize);CHKERRQ(ierr); 7719 ierr = ISGetIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7720 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7721 ierr = ISRestoreIndices(pcbddc->ISForDofsLocal[i],&idxs);CHKERRQ(ierr); 7722 } else { 7723 ierr = ISGetLocalSize(pcbddc->nedclocal,&tsize);CHKERRQ(ierr); 7724 ierr = ISGetIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7725 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7726 if (tsize != nout) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Failed when mapping coarse nedelec field! %d != %d\n",tsize,nout); 7727 ierr = ISRestoreIndices(pcbddc->nedclocal,&idxs);CHKERRQ(ierr); 7728 } 7729 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7730 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[i]);CHKERRQ(ierr); 7731 /* ierr = ISView(isarray[i],0);CHKERRQ(ierr); */ 7732 } 7733 /* neumann boundaries */ 7734 if (pcbddc->NeumannBoundariesLocal) { 7735 /* ierr = ISView(pcbddc->NeumannBoundariesLocal,0);CHKERRQ(ierr); */ 7736 ierr = ISGetLocalSize(pcbddc->NeumannBoundariesLocal,&tsize);CHKERRQ(ierr); 7737 ierr = ISGetIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7738 ierr = ISGlobalToLocalMappingApply(tmap,IS_GTOLM_DROP,tsize,idxs,&nout,tidxs);CHKERRQ(ierr); 7739 ierr = ISRestoreIndices(pcbddc->NeumannBoundariesLocal,&idxs);CHKERRQ(ierr); 7740 ierr = ISLocalToGlobalMappingApply(coarse_islg,nout,tidxs,tidxs2);CHKERRQ(ierr); 7741 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),nout,tidxs2,PETSC_COPY_VALUES,&isarray[nisdofs]);CHKERRQ(ierr); 7742 /* ierr = ISView(isarray[nisdofs],0);CHKERRQ(ierr); */ 7743 } 7744 /* free memory */ 7745 ierr = PetscFree(tidxs);CHKERRQ(ierr); 7746 ierr = PetscFree(tidxs2);CHKERRQ(ierr); 7747 ierr = ISLocalToGlobalMappingDestroy(&tmap);CHKERRQ(ierr); 7748 } else { 7749 nis = 0; 7750 nisdofs = 0; 7751 nisneu = 0; 7752 nisvert = 0; 7753 isarray = NULL; 7754 } 7755 /* destroy no longer needed map */ 7756 ierr = ISLocalToGlobalMappingDestroy(&coarse_islg);CHKERRQ(ierr); 7757 7758 /* subassemble */ 7759 if (multilevel_allowed) { 7760 Vec vp[1]; 7761 PetscInt nvecs = 0; 7762 PetscBool reuse,reuser; 7763 7764 if (coarse_mat) reuse = PETSC_TRUE; 7765 else reuse = PETSC_FALSE; 7766 ierr = MPIU_Allreduce(&reuse,&reuser,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 7767 vp[0] = NULL; 7768 if (pcbddc->benign_have_null) { /* propagate no-net-flux quadrature to coarser level */ 7769 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&vp[0]);CHKERRQ(ierr); 7770 ierr = VecSetSizes(vp[0],pcbddc->local_primal_size,PETSC_DECIDE);CHKERRQ(ierr); 7771 ierr = VecSetType(vp[0],VECSTANDARD);CHKERRQ(ierr); 7772 nvecs = 1; 7773 7774 if (pcbddc->divudotp) { 7775 Mat B,loc_divudotp; 7776 Vec v,p; 7777 IS dummy; 7778 PetscInt np; 7779 7780 ierr = MatISGetLocalMat(pcbddc->divudotp,&loc_divudotp);CHKERRQ(ierr); 7781 ierr = MatGetSize(loc_divudotp,&np,NULL);CHKERRQ(ierr); 7782 ierr = ISCreateStride(PETSC_COMM_SELF,np,0,1,&dummy);CHKERRQ(ierr); 7783 ierr = MatCreateSubMatrix(loc_divudotp,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B);CHKERRQ(ierr); 7784 ierr = MatCreateVecs(B,&v,&p);CHKERRQ(ierr); 7785 ierr = VecSet(p,1.);CHKERRQ(ierr); 7786 ierr = MatMultTranspose(B,p,v);CHKERRQ(ierr); 7787 ierr = VecDestroy(&p);CHKERRQ(ierr); 7788 ierr = MatDestroy(&B);CHKERRQ(ierr); 7789 ierr = VecGetArray(vp[0],&array);CHKERRQ(ierr); 7790 ierr = VecPlaceArray(pcbddc->vec1_P,array);CHKERRQ(ierr); 7791 ierr = VecRestoreArray(vp[0],&array);CHKERRQ(ierr); 7792 ierr = MatMultTranspose(pcbddc->coarse_phi_B,v,pcbddc->vec1_P);CHKERRQ(ierr); 7793 ierr = VecResetArray(pcbddc->vec1_P);CHKERRQ(ierr); 7794 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 7795 ierr = VecDestroy(&v);CHKERRQ(ierr); 7796 } 7797 } 7798 if (reuser) { 7799 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_TRUE,&coarse_mat,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7800 } else { 7801 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,nis,isarray,nvecs,vp);CHKERRQ(ierr); 7802 } 7803 if (vp[0]) { /* vp[0] could have been placed on a different set of processes */ 7804 PetscScalar *arraym,*arrayv; 7805 PetscInt nl; 7806 ierr = VecGetLocalSize(vp[0],&nl);CHKERRQ(ierr); 7807 ierr = MatCreateSeqDense(PETSC_COMM_SELF,1,nl,NULL,&coarsedivudotp);CHKERRQ(ierr); 7808 ierr = MatDenseGetArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7809 ierr = VecGetArray(vp[0],&arrayv);CHKERRQ(ierr); 7810 ierr = PetscMemcpy(arraym,arrayv,nl*sizeof(PetscScalar));CHKERRQ(ierr); 7811 ierr = VecRestoreArray(vp[0],&arrayv);CHKERRQ(ierr); 7812 ierr = MatDenseRestoreArray(coarsedivudotp,&arraym);CHKERRQ(ierr); 7813 ierr = VecDestroy(&vp[0]);CHKERRQ(ierr); 7814 } else { 7815 ierr = MatCreateSeqAIJ(PETSC_COMM_SELF,0,0,1,NULL,&coarsedivudotp);CHKERRQ(ierr); 7816 } 7817 } else { 7818 ierr = PCBDDCMatISSubassemble(t_coarse_mat_is,pcbddc->coarse_subassembling,0,restr,full_restr,PETSC_FALSE,&coarse_mat_is,0,NULL,0,NULL);CHKERRQ(ierr); 7819 } 7820 if (coarse_mat_is || coarse_mat) { 7821 PetscMPIInt size; 7822 ierr = MPI_Comm_size(PetscObjectComm((PetscObject)coarse_mat_is),&size);CHKERRQ(ierr); 7823 if (!multilevel_allowed) { 7824 ierr = MatISGetMPIXAIJ(coarse_mat_is,coarse_mat_reuse,&coarse_mat);CHKERRQ(ierr); 7825 } else { 7826 Mat A; 7827 7828 /* if this matrix is present, it means we are not reusing the coarse matrix */ 7829 if (coarse_mat_is) { 7830 if (coarse_mat) SETERRQ(PetscObjectComm((PetscObject)coarse_mat_is),PETSC_ERR_PLIB,"This should not happen"); 7831 ierr = PetscObjectReference((PetscObject)coarse_mat_is);CHKERRQ(ierr); 7832 coarse_mat = coarse_mat_is; 7833 } 7834 /* be sure we don't have MatSeqDENSE as local mat */ 7835 ierr = MatISGetLocalMat(coarse_mat,&A);CHKERRQ(ierr); 7836 ierr = MatConvert(A,MATSEQAIJ,MAT_INPLACE_MATRIX,&A);CHKERRQ(ierr); 7837 } 7838 } 7839 ierr = MatDestroy(&t_coarse_mat_is);CHKERRQ(ierr); 7840 ierr = MatDestroy(&coarse_mat_is);CHKERRQ(ierr); 7841 7842 /* create local to global scatters for coarse problem */ 7843 if (compute_vecs) { 7844 PetscInt lrows; 7845 ierr = VecDestroy(&pcbddc->coarse_vec);CHKERRQ(ierr); 7846 if (coarse_mat) { 7847 ierr = MatGetLocalSize(coarse_mat,&lrows,NULL);CHKERRQ(ierr); 7848 } else { 7849 lrows = 0; 7850 } 7851 ierr = VecCreate(PetscObjectComm((PetscObject)pc),&pcbddc->coarse_vec);CHKERRQ(ierr); 7852 ierr = VecSetSizes(pcbddc->coarse_vec,lrows,PETSC_DECIDE);CHKERRQ(ierr); 7853 ierr = VecSetType(pcbddc->coarse_vec,VECSTANDARD);CHKERRQ(ierr); 7854 ierr = VecScatterDestroy(&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7855 ierr = VecScatterCreate(pcbddc->vec1_P,NULL,pcbddc->coarse_vec,coarse_is,&pcbddc->coarse_loc_to_glob);CHKERRQ(ierr); 7856 } 7857 ierr = ISDestroy(&coarse_is);CHKERRQ(ierr); 7858 7859 /* set defaults for coarse KSP and PC */ 7860 if (multilevel_allowed) { 7861 coarse_ksp_type = KSPRICHARDSON; 7862 coarse_pc_type = PCBDDC; 7863 } else { 7864 coarse_ksp_type = KSPPREONLY; 7865 coarse_pc_type = PCREDUNDANT; 7866 } 7867 7868 /* print some info if requested */ 7869 if (pcbddc->dbg_flag) { 7870 if (!multilevel_allowed) { 7871 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 7872 if (multilevel_requested) { 7873 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Not enough active processes on level %d (active processes %d, coarsening ratio %d)\n",pcbddc->current_level,active_procs,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7874 } else if (pcbddc->max_levels) { 7875 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Maximum number of requested levels reached (%d)\n",pcbddc->max_levels);CHKERRQ(ierr); 7876 } 7877 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 7878 } 7879 } 7880 7881 /* communicate coarse discrete gradient */ 7882 coarseG = NULL; 7883 if (pcbddc->nedcG && multilevel_allowed) { 7884 MPI_Comm ccomm; 7885 if (coarse_mat) { 7886 ccomm = PetscObjectComm((PetscObject)coarse_mat); 7887 } else { 7888 ccomm = MPI_COMM_NULL; 7889 } 7890 ierr = MatMPIAIJRestrict(pcbddc->nedcG,ccomm,&coarseG);CHKERRQ(ierr); 7891 } 7892 7893 /* create the coarse KSP object only once with defaults */ 7894 if (coarse_mat) { 7895 PetscBool isredundant,isnn,isbddc; 7896 PetscViewer dbg_viewer = NULL; 7897 7898 if (pcbddc->dbg_flag) { 7899 dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)coarse_mat)); 7900 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 7901 } 7902 if (!pcbddc->coarse_ksp) { 7903 char prefix[256],str_level[16]; 7904 size_t len; 7905 7906 ierr = KSPCreate(PetscObjectComm((PetscObject)coarse_mat),&pcbddc->coarse_ksp);CHKERRQ(ierr); 7907 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 7908 ierr = PetscObjectIncrementTabLevel((PetscObject)pcbddc->coarse_ksp,(PetscObject)pc,1);CHKERRQ(ierr); 7909 ierr = KSPSetTolerances(pcbddc->coarse_ksp,PETSC_DEFAULT,PETSC_DEFAULT,PETSC_DEFAULT,1);CHKERRQ(ierr); 7910 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 7911 ierr = KSPSetType(pcbddc->coarse_ksp,coarse_ksp_type);CHKERRQ(ierr); 7912 ierr = KSPSetNormType(pcbddc->coarse_ksp,KSP_NORM_NONE);CHKERRQ(ierr); 7913 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7914 /* TODO is this logic correct? should check for coarse_mat type */ 7915 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7916 /* prefix */ 7917 ierr = PetscStrcpy(prefix,"");CHKERRQ(ierr); 7918 ierr = PetscStrcpy(str_level,"");CHKERRQ(ierr); 7919 if (!pcbddc->current_level) { 7920 ierr = PetscStrcpy(prefix,((PetscObject)pc)->prefix);CHKERRQ(ierr); 7921 ierr = PetscStrcat(prefix,"pc_bddc_coarse_");CHKERRQ(ierr); 7922 } else { 7923 ierr = PetscStrlen(((PetscObject)pc)->prefix,&len);CHKERRQ(ierr); 7924 if (pcbddc->current_level>1) len -= 3; /* remove "lX_" with X level number */ 7925 if (pcbddc->current_level>10) len -= 1; /* remove another char from level number */ 7926 ierr = PetscStrncpy(prefix,((PetscObject)pc)->prefix,len+1);CHKERRQ(ierr); 7927 ierr = PetscSNPrintf(str_level,sizeof(str_level),"l%d_",(int)(pcbddc->current_level));CHKERRQ(ierr); 7928 ierr = PetscStrcat(prefix,str_level);CHKERRQ(ierr); 7929 } 7930 ierr = KSPSetOptionsPrefix(pcbddc->coarse_ksp,prefix);CHKERRQ(ierr); 7931 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7932 ierr = PCBDDCSetLevel(pc_temp,pcbddc->current_level+1);CHKERRQ(ierr); 7933 ierr = PCBDDCSetCoarseningRatio(pc_temp,pcbddc->coarsening_ratio);CHKERRQ(ierr); 7934 ierr = PCBDDCSetLevels(pc_temp,pcbddc->max_levels);CHKERRQ(ierr); 7935 /* allow user customization */ 7936 ierr = KSPSetFromOptions(pcbddc->coarse_ksp);CHKERRQ(ierr); 7937 } 7938 /* propagate BDDC info to the next level (these are dummy calls if pc_temp is not of type PCBDDC) */ 7939 ierr = KSPGetPC(pcbddc->coarse_ksp,&pc_temp);CHKERRQ(ierr); 7940 if (nisdofs) { 7941 ierr = PCBDDCSetDofsSplitting(pc_temp,nisdofs,isarray);CHKERRQ(ierr); 7942 for (i=0;i<nisdofs;i++) { 7943 ierr = ISDestroy(&isarray[i]);CHKERRQ(ierr); 7944 } 7945 } 7946 if (nisneu) { 7947 ierr = PCBDDCSetNeumannBoundaries(pc_temp,isarray[nisdofs]);CHKERRQ(ierr); 7948 ierr = ISDestroy(&isarray[nisdofs]);CHKERRQ(ierr); 7949 } 7950 if (nisvert) { 7951 ierr = PCBDDCSetPrimalVerticesIS(pc_temp,isarray[nis-1]);CHKERRQ(ierr); 7952 ierr = ISDestroy(&isarray[nis-1]);CHKERRQ(ierr); 7953 } 7954 if (coarseG) { 7955 ierr = PCBDDCSetDiscreteGradient(pc_temp,coarseG,1,nedcfield,PETSC_FALSE,PETSC_TRUE);CHKERRQ(ierr); 7956 } 7957 7958 /* get some info after set from options */ 7959 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7960 /* multilevel can only be requested via -pc_bddc_levels or PCBDDCSetLevels */ 7961 if (isbddc && !multilevel_allowed) { 7962 ierr = PCSetType(pc_temp,coarse_pc_type);CHKERRQ(ierr); 7963 isbddc = PETSC_FALSE; 7964 } 7965 /* multilevel cannot be done with coarse PCs different from BDDC or NN */ 7966 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCNN,&isnn);CHKERRQ(ierr); 7967 if (multilevel_requested && multilevel_allowed && !isbddc && !isnn) { 7968 ierr = PCSetType(pc_temp,PCBDDC);CHKERRQ(ierr); 7969 isbddc = PETSC_TRUE; 7970 } 7971 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCREDUNDANT,&isredundant);CHKERRQ(ierr); 7972 if (isredundant) { 7973 KSP inner_ksp; 7974 PC inner_pc; 7975 7976 ierr = PCRedundantGetKSP(pc_temp,&inner_ksp);CHKERRQ(ierr); 7977 ierr = KSPGetPC(inner_ksp,&inner_pc);CHKERRQ(ierr); 7978 } 7979 7980 /* parameters which miss an API */ 7981 ierr = PetscObjectTypeCompare((PetscObject)pc_temp,PCBDDC,&isbddc);CHKERRQ(ierr); 7982 if (isbddc) { 7983 PC_BDDC* pcbddc_coarse = (PC_BDDC*)pc_temp->data; 7984 7985 pcbddc_coarse->detect_disconnected = PETSC_TRUE; 7986 pcbddc_coarse->coarse_eqs_per_proc = pcbddc->coarse_eqs_per_proc; 7987 pcbddc_coarse->benign_saddle_point = pcbddc->benign_have_null; 7988 if (pcbddc_coarse->benign_saddle_point) { 7989 Mat coarsedivudotp_is; 7990 ISLocalToGlobalMapping l2gmap,rl2g,cl2g; 7991 IS row,col; 7992 const PetscInt *gidxs; 7993 PetscInt n,st,M,N; 7994 7995 ierr = MatGetSize(coarsedivudotp,&n,NULL);CHKERRQ(ierr); 7996 ierr = MPI_Scan(&n,&st,1,MPIU_INT,MPI_SUM,PetscObjectComm((PetscObject)coarse_mat));CHKERRQ(ierr); 7997 st = st-n; 7998 ierr = ISCreateStride(PetscObjectComm((PetscObject)coarse_mat),1,st,1,&row);CHKERRQ(ierr); 7999 ierr = MatGetLocalToGlobalMapping(coarse_mat,&l2gmap,NULL);CHKERRQ(ierr); 8000 ierr = ISLocalToGlobalMappingGetSize(l2gmap,&n);CHKERRQ(ierr); 8001 ierr = ISLocalToGlobalMappingGetIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8002 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)coarse_mat),n,gidxs,PETSC_COPY_VALUES,&col);CHKERRQ(ierr); 8003 ierr = ISLocalToGlobalMappingRestoreIndices(l2gmap,&gidxs);CHKERRQ(ierr); 8004 ierr = ISLocalToGlobalMappingCreateIS(row,&rl2g);CHKERRQ(ierr); 8005 ierr = ISLocalToGlobalMappingCreateIS(col,&cl2g);CHKERRQ(ierr); 8006 ierr = ISGetSize(row,&M);CHKERRQ(ierr); 8007 ierr = MatGetSize(coarse_mat,&N,NULL);CHKERRQ(ierr); 8008 ierr = ISDestroy(&row);CHKERRQ(ierr); 8009 ierr = ISDestroy(&col);CHKERRQ(ierr); 8010 ierr = MatCreate(PetscObjectComm((PetscObject)coarse_mat),&coarsedivudotp_is);CHKERRQ(ierr); 8011 ierr = MatSetType(coarsedivudotp_is,MATIS);CHKERRQ(ierr); 8012 ierr = MatSetSizes(coarsedivudotp_is,PETSC_DECIDE,PETSC_DECIDE,M,N);CHKERRQ(ierr); 8013 ierr = MatSetLocalToGlobalMapping(coarsedivudotp_is,rl2g,cl2g);CHKERRQ(ierr); 8014 ierr = ISLocalToGlobalMappingDestroy(&rl2g);CHKERRQ(ierr); 8015 ierr = ISLocalToGlobalMappingDestroy(&cl2g);CHKERRQ(ierr); 8016 ierr = MatISSetLocalMat(coarsedivudotp_is,coarsedivudotp);CHKERRQ(ierr); 8017 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8018 ierr = PCBDDCSetDivergenceMat(pc_temp,coarsedivudotp_is,PETSC_FALSE,NULL);CHKERRQ(ierr); 8019 ierr = MatDestroy(&coarsedivudotp_is);CHKERRQ(ierr); 8020 pcbddc_coarse->adaptive_userdefined = PETSC_TRUE; 8021 if (pcbddc->adaptive_threshold < 1.0) pcbddc_coarse->deluxe_zerorows = PETSC_TRUE; 8022 } 8023 } 8024 8025 /* propagate symmetry info of coarse matrix */ 8026 ierr = MatSetOption(coarse_mat,MAT_STRUCTURALLY_SYMMETRIC,PETSC_TRUE);CHKERRQ(ierr); 8027 if (pc->pmat->symmetric_set) { 8028 ierr = MatSetOption(coarse_mat,MAT_SYMMETRIC,pc->pmat->symmetric);CHKERRQ(ierr); 8029 } 8030 if (pc->pmat->hermitian_set) { 8031 ierr = MatSetOption(coarse_mat,MAT_HERMITIAN,pc->pmat->hermitian);CHKERRQ(ierr); 8032 } 8033 if (pc->pmat->spd_set) { 8034 ierr = MatSetOption(coarse_mat,MAT_SPD,pc->pmat->spd);CHKERRQ(ierr); 8035 } 8036 if (pcbddc->benign_saddle_point && !pcbddc->benign_have_null) { 8037 ierr = MatSetOption(coarse_mat,MAT_SPD,PETSC_TRUE);CHKERRQ(ierr); 8038 } 8039 /* set operators */ 8040 ierr = KSPSetOperators(pcbddc->coarse_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8041 if (pcbddc->dbg_flag) { 8042 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*pcbddc->current_level);CHKERRQ(ierr); 8043 } 8044 } 8045 ierr = MatDestroy(&coarseG);CHKERRQ(ierr); 8046 ierr = PetscFree(isarray);CHKERRQ(ierr); 8047 #if 0 8048 { 8049 PetscViewer viewer; 8050 char filename[256]; 8051 sprintf(filename,"coarse_mat_level%d.m",pcbddc->current_level); 8052 ierr = PetscViewerASCIIOpen(PetscObjectComm((PetscObject)coarse_mat),filename,&viewer);CHKERRQ(ierr); 8053 ierr = PetscViewerPushFormat(viewer,PETSC_VIEWER_ASCII_MATLAB);CHKERRQ(ierr); 8054 ierr = MatView(coarse_mat,viewer);CHKERRQ(ierr); 8055 ierr = PetscViewerPopFormat(viewer);CHKERRQ(ierr); 8056 ierr = PetscViewerDestroy(&viewer);CHKERRQ(ierr); 8057 } 8058 #endif 8059 8060 if (pcbddc->coarse_ksp) { 8061 Vec crhs,csol; 8062 8063 ierr = KSPGetSolution(pcbddc->coarse_ksp,&csol);CHKERRQ(ierr); 8064 ierr = KSPGetRhs(pcbddc->coarse_ksp,&crhs);CHKERRQ(ierr); 8065 if (!csol) { 8066 ierr = MatCreateVecs(coarse_mat,&((pcbddc->coarse_ksp)->vec_sol),NULL);CHKERRQ(ierr); 8067 } 8068 if (!crhs) { 8069 ierr = MatCreateVecs(coarse_mat,NULL,&((pcbddc->coarse_ksp)->vec_rhs));CHKERRQ(ierr); 8070 } 8071 } 8072 ierr = MatDestroy(&coarsedivudotp);CHKERRQ(ierr); 8073 8074 /* compute null space for coarse solver if the benign trick has been requested */ 8075 if (pcbddc->benign_null) { 8076 8077 ierr = VecSet(pcbddc->vec1_P,0.);CHKERRQ(ierr); 8078 for (i=0;i<pcbddc->benign_n;i++) { 8079 ierr = VecSetValue(pcbddc->vec1_P,pcbddc->local_primal_size-pcbddc->benign_n+i,1.0,INSERT_VALUES);CHKERRQ(ierr); 8080 } 8081 ierr = VecAssemblyBegin(pcbddc->vec1_P);CHKERRQ(ierr); 8082 ierr = VecAssemblyEnd(pcbddc->vec1_P);CHKERRQ(ierr); 8083 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8084 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8085 if (coarse_mat) { 8086 Vec nullv; 8087 PetscScalar *array,*array2; 8088 PetscInt nl; 8089 8090 ierr = MatCreateVecs(coarse_mat,&nullv,NULL);CHKERRQ(ierr); 8091 ierr = VecGetLocalSize(nullv,&nl);CHKERRQ(ierr); 8092 ierr = VecGetArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8093 ierr = VecGetArray(nullv,&array2);CHKERRQ(ierr); 8094 ierr = PetscMemcpy(array2,array,nl*sizeof(*array));CHKERRQ(ierr); 8095 ierr = VecRestoreArray(nullv,&array2);CHKERRQ(ierr); 8096 ierr = VecRestoreArrayRead(pcbddc->coarse_vec,(const PetscScalar**)&array);CHKERRQ(ierr); 8097 ierr = VecNormalize(nullv,NULL);CHKERRQ(ierr); 8098 ierr = MatNullSpaceCreate(PetscObjectComm((PetscObject)coarse_mat),PETSC_FALSE,1,&nullv,&CoarseNullSpace);CHKERRQ(ierr); 8099 ierr = VecDestroy(&nullv);CHKERRQ(ierr); 8100 } 8101 } 8102 8103 if (pcbddc->coarse_ksp) { 8104 PetscBool ispreonly; 8105 8106 if (CoarseNullSpace) { 8107 PetscBool isnull; 8108 ierr = MatNullSpaceTest(CoarseNullSpace,coarse_mat,&isnull);CHKERRQ(ierr); 8109 if (isnull) { 8110 ierr = MatSetNullSpace(coarse_mat,CoarseNullSpace);CHKERRQ(ierr); 8111 } 8112 /* TODO: add local nullspaces (if any) */ 8113 } 8114 /* setup coarse ksp */ 8115 ierr = KSPSetUp(pcbddc->coarse_ksp);CHKERRQ(ierr); 8116 /* Check coarse problem if in debug mode or if solving with an iterative method */ 8117 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->coarse_ksp,KSPPREONLY,&ispreonly);CHKERRQ(ierr); 8118 if (pcbddc->dbg_flag || (!ispreonly && pcbddc->use_coarse_estimates) ) { 8119 KSP check_ksp; 8120 KSPType check_ksp_type; 8121 PC check_pc; 8122 Vec check_vec,coarse_vec; 8123 PetscReal abs_infty_error,infty_error,lambda_min=1.0,lambda_max=1.0; 8124 PetscInt its; 8125 PetscBool compute_eigs; 8126 PetscReal *eigs_r,*eigs_c; 8127 PetscInt neigs; 8128 const char *prefix; 8129 8130 /* Create ksp object suitable for estimation of extreme eigenvalues */ 8131 ierr = KSPCreate(PetscObjectComm((PetscObject)pcbddc->coarse_ksp),&check_ksp);CHKERRQ(ierr); 8132 ierr = KSPSetErrorIfNotConverged(pcbddc->coarse_ksp,pc->erroriffailure);CHKERRQ(ierr); 8133 ierr = KSPSetOperators(check_ksp,coarse_mat,coarse_mat);CHKERRQ(ierr); 8134 ierr = KSPSetTolerances(check_ksp,1.e-12,1.e-12,PETSC_DEFAULT,pcbddc->coarse_size);CHKERRQ(ierr); 8135 /* prevent from setup unneeded object */ 8136 ierr = KSPGetPC(check_ksp,&check_pc);CHKERRQ(ierr); 8137 ierr = PCSetType(check_pc,PCNONE);CHKERRQ(ierr); 8138 if (ispreonly) { 8139 check_ksp_type = KSPPREONLY; 8140 compute_eigs = PETSC_FALSE; 8141 } else { 8142 check_ksp_type = KSPGMRES; 8143 compute_eigs = PETSC_TRUE; 8144 } 8145 ierr = KSPSetType(check_ksp,check_ksp_type);CHKERRQ(ierr); 8146 ierr = KSPSetComputeSingularValues(check_ksp,compute_eigs);CHKERRQ(ierr); 8147 ierr = KSPSetComputeEigenvalues(check_ksp,compute_eigs);CHKERRQ(ierr); 8148 ierr = KSPGMRESSetRestart(check_ksp,pcbddc->coarse_size+1);CHKERRQ(ierr); 8149 ierr = KSPGetOptionsPrefix(pcbddc->coarse_ksp,&prefix);CHKERRQ(ierr); 8150 ierr = KSPSetOptionsPrefix(check_ksp,prefix);CHKERRQ(ierr); 8151 ierr = KSPAppendOptionsPrefix(check_ksp,"check_");CHKERRQ(ierr); 8152 ierr = KSPSetFromOptions(check_ksp);CHKERRQ(ierr); 8153 ierr = KSPSetUp(check_ksp);CHKERRQ(ierr); 8154 ierr = KSPGetPC(pcbddc->coarse_ksp,&check_pc);CHKERRQ(ierr); 8155 ierr = KSPSetPC(check_ksp,check_pc);CHKERRQ(ierr); 8156 /* create random vec */ 8157 ierr = MatCreateVecs(coarse_mat,&coarse_vec,&check_vec);CHKERRQ(ierr); 8158 ierr = VecSetRandom(check_vec,NULL);CHKERRQ(ierr); 8159 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8160 /* solve coarse problem */ 8161 ierr = KSPSolve(check_ksp,coarse_vec,coarse_vec);CHKERRQ(ierr); 8162 /* set eigenvalue estimation if preonly has not been requested */ 8163 if (compute_eigs) { 8164 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_r);CHKERRQ(ierr); 8165 ierr = PetscMalloc1(pcbddc->coarse_size+1,&eigs_c);CHKERRQ(ierr); 8166 ierr = KSPComputeEigenvalues(check_ksp,pcbddc->coarse_size+1,eigs_r,eigs_c,&neigs);CHKERRQ(ierr); 8167 if (neigs) { 8168 lambda_max = eigs_r[neigs-1]; 8169 lambda_min = eigs_r[0]; 8170 if (pcbddc->use_coarse_estimates) { 8171 if (lambda_max>=lambda_min) { /* using PETSC_SMALL since lambda_max == lambda_min is not allowed by KSPChebyshevSetEigenvalues */ 8172 ierr = KSPChebyshevSetEigenvalues(pcbddc->coarse_ksp,lambda_max+PETSC_SMALL,lambda_min);CHKERRQ(ierr); 8173 ierr = KSPRichardsonSetScale(pcbddc->coarse_ksp,2.0/(lambda_max+lambda_min));CHKERRQ(ierr); 8174 } 8175 } 8176 } 8177 } 8178 8179 /* check coarse problem residual error */ 8180 if (pcbddc->dbg_flag) { 8181 PetscViewer dbg_viewer = PETSC_VIEWER_STDOUT_(PetscObjectComm((PetscObject)pcbddc->coarse_ksp)); 8182 ierr = PetscViewerASCIIAddTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8183 ierr = VecAXPY(check_vec,-1.0,coarse_vec);CHKERRQ(ierr); 8184 ierr = VecNorm(check_vec,NORM_INFINITY,&infty_error);CHKERRQ(ierr); 8185 ierr = MatMult(coarse_mat,check_vec,coarse_vec);CHKERRQ(ierr); 8186 ierr = VecNorm(coarse_vec,NORM_INFINITY,&abs_infty_error);CHKERRQ(ierr); 8187 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem details (use estimates %d)\n",pcbddc->use_coarse_estimates);CHKERRQ(ierr); 8188 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(pcbddc->coarse_ksp),dbg_viewer);CHKERRQ(ierr); 8189 ierr = PetscObjectPrintClassNamePrefixType((PetscObject)(check_pc),dbg_viewer);CHKERRQ(ierr); 8190 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem exact infty_error : %1.6e\n",infty_error);CHKERRQ(ierr); 8191 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem residual infty_error: %1.6e\n",abs_infty_error);CHKERRQ(ierr); 8192 if (CoarseNullSpace) { 8193 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem is singular\n");CHKERRQ(ierr); 8194 } 8195 if (compute_eigs) { 8196 PetscReal lambda_max_s,lambda_min_s; 8197 KSPConvergedReason reason; 8198 ierr = KSPGetType(check_ksp,&check_ksp_type);CHKERRQ(ierr); 8199 ierr = KSPGetIterationNumber(check_ksp,&its);CHKERRQ(ierr); 8200 ierr = KSPGetConvergedReason(check_ksp,&reason);CHKERRQ(ierr); 8201 ierr = KSPComputeExtremeSingularValues(check_ksp,&lambda_max_s,&lambda_min_s);CHKERRQ(ierr); 8202 ierr = PetscViewerASCIIPrintf(dbg_viewer,"Coarse problem eigenvalues (estimated with %d iterations of %s, conv reason %d): %1.6e %1.6e (%1.6e %1.6e)\n",its,check_ksp_type,reason,lambda_min,lambda_max,lambda_min_s,lambda_max_s);CHKERRQ(ierr); 8203 for (i=0;i<neigs;i++) { 8204 ierr = PetscViewerASCIIPrintf(dbg_viewer,"%1.6e %1.6ei\n",eigs_r[i],eigs_c[i]);CHKERRQ(ierr); 8205 } 8206 } 8207 ierr = PetscViewerFlush(dbg_viewer);CHKERRQ(ierr); 8208 ierr = PetscViewerASCIISubtractTab(dbg_viewer,2*(pcbddc->current_level+1));CHKERRQ(ierr); 8209 } 8210 ierr = VecDestroy(&check_vec);CHKERRQ(ierr); 8211 ierr = VecDestroy(&coarse_vec);CHKERRQ(ierr); 8212 ierr = KSPDestroy(&check_ksp);CHKERRQ(ierr); 8213 if (compute_eigs) { 8214 ierr = PetscFree(eigs_r);CHKERRQ(ierr); 8215 ierr = PetscFree(eigs_c);CHKERRQ(ierr); 8216 } 8217 } 8218 } 8219 ierr = MatNullSpaceDestroy(&CoarseNullSpace);CHKERRQ(ierr); 8220 /* print additional info */ 8221 if (pcbddc->dbg_flag) { 8222 /* waits until all processes reaches this point */ 8223 ierr = PetscBarrier((PetscObject)pc);CHKERRQ(ierr); 8224 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Coarse solver setup completed at level %d\n",pcbddc->current_level);CHKERRQ(ierr); 8225 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8226 } 8227 8228 /* free memory */ 8229 ierr = MatDestroy(&coarse_mat);CHKERRQ(ierr); 8230 PetscFunctionReturn(0); 8231 } 8232 8233 PetscErrorCode PCBDDCComputePrimalNumbering(PC pc,PetscInt* coarse_size_n,PetscInt** local_primal_indices_n) 8234 { 8235 PC_BDDC* pcbddc = (PC_BDDC*)pc->data; 8236 PC_IS* pcis = (PC_IS*)pc->data; 8237 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8238 IS subset,subset_mult,subset_n; 8239 PetscInt local_size,coarse_size=0; 8240 PetscInt *local_primal_indices=NULL; 8241 const PetscInt *t_local_primal_indices; 8242 PetscErrorCode ierr; 8243 8244 PetscFunctionBegin; 8245 /* Compute global number of coarse dofs */ 8246 if (pcbddc->local_primal_size && !pcbddc->local_primal_ref_node) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_PLIB,"BDDC ConstraintsSetUp should be called first"); 8247 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_node,PETSC_COPY_VALUES,&subset_n);CHKERRQ(ierr); 8248 ierr = ISLocalToGlobalMappingApplyIS(pcis->mapping,subset_n,&subset);CHKERRQ(ierr); 8249 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8250 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)(pc->pmat)),pcbddc->local_primal_size_cc,pcbddc->local_primal_ref_mult,PETSC_COPY_VALUES,&subset_mult);CHKERRQ(ierr); 8251 ierr = ISRenumber(subset,subset_mult,&coarse_size,&subset_n);CHKERRQ(ierr); 8252 ierr = ISDestroy(&subset);CHKERRQ(ierr); 8253 ierr = ISDestroy(&subset_mult);CHKERRQ(ierr); 8254 ierr = ISGetLocalSize(subset_n,&local_size);CHKERRQ(ierr); 8255 if (local_size != pcbddc->local_primal_size) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"Invalid number of local primal indices computed %D != %D",local_size,pcbddc->local_primal_size); 8256 ierr = PetscMalloc1(local_size,&local_primal_indices);CHKERRQ(ierr); 8257 ierr = ISGetIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8258 ierr = PetscMemcpy(local_primal_indices,t_local_primal_indices,local_size*sizeof(PetscInt));CHKERRQ(ierr); 8259 ierr = ISRestoreIndices(subset_n,&t_local_primal_indices);CHKERRQ(ierr); 8260 ierr = ISDestroy(&subset_n);CHKERRQ(ierr); 8261 8262 /* check numbering */ 8263 if (pcbddc->dbg_flag) { 8264 PetscScalar coarsesum,*array,*array2; 8265 PetscInt i; 8266 PetscBool set_error = PETSC_FALSE,set_error_reduced = PETSC_FALSE; 8267 8268 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8269 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"--------------------------------------------------\n");CHKERRQ(ierr); 8270 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Check coarse indices\n");CHKERRQ(ierr); 8271 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8272 /* counter */ 8273 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8274 ierr = VecSet(pcis->vec1_N,1.0);CHKERRQ(ierr); 8275 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8276 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8277 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8278 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec2_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8279 ierr = VecSet(pcis->vec1_N,0.0);CHKERRQ(ierr); 8280 for (i=0;i<pcbddc->local_primal_size;i++) { 8281 ierr = VecSetValue(pcis->vec1_N,pcbddc->primal_indices_local_idxs[i],1.0,INSERT_VALUES);CHKERRQ(ierr); 8282 } 8283 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8284 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8285 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8286 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8287 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8288 ierr = VecScatterBegin(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8289 ierr = VecScatterEnd(matis->rctx,pcis->vec1_global,pcis->vec1_N,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8290 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8291 ierr = VecGetArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8292 for (i=0;i<pcis->n;i++) { 8293 if (array[i] != 0.0 && array[i] != array2[i]) { 8294 PetscInt owned = (PetscInt)PetscRealPart(array[i]),gi; 8295 PetscInt neigh = (PetscInt)PetscRealPart(array2[i]); 8296 set_error = PETSC_TRUE; 8297 ierr = ISLocalToGlobalMappingApply(pcis->mapping,1,&i,&gi);CHKERRQ(ierr); 8298 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d: local index %d (gid %d) owned by %d processes instead of %d!\n",PetscGlobalRank,i,gi,owned,neigh);CHKERRQ(ierr); 8299 } 8300 } 8301 ierr = VecRestoreArray(pcis->vec2_N,&array2);CHKERRQ(ierr); 8302 ierr = MPIU_Allreduce(&set_error,&set_error_reduced,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8303 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8304 for (i=0;i<pcis->n;i++) { 8305 if (PetscRealPart(array[i]) > 0.0) array[i] = 1.0/PetscRealPart(array[i]); 8306 } 8307 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8308 ierr = VecSet(pcis->vec1_global,0.0);CHKERRQ(ierr); 8309 ierr = VecScatterBegin(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8310 ierr = VecScatterEnd(matis->rctx,pcis->vec1_N,pcis->vec1_global,ADD_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8311 ierr = VecSum(pcis->vec1_global,&coarsesum);CHKERRQ(ierr); 8312 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Size of coarse problem is %d (%lf)\n",coarse_size,PetscRealPart(coarsesum));CHKERRQ(ierr); 8313 if (pcbddc->dbg_flag > 1 || set_error_reduced) { 8314 PetscInt *gidxs; 8315 8316 ierr = PetscMalloc1(pcbddc->local_primal_size,&gidxs);CHKERRQ(ierr); 8317 ierr = ISLocalToGlobalMappingApply(pcis->mapping,pcbddc->local_primal_size,pcbddc->primal_indices_local_idxs,gidxs);CHKERRQ(ierr); 8318 ierr = PetscViewerASCIIPrintf(pcbddc->dbg_viewer,"Distribution of local primal indices\n");CHKERRQ(ierr); 8319 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8320 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d\n",PetscGlobalRank);CHKERRQ(ierr); 8321 for (i=0;i<pcbddc->local_primal_size;i++) { 8322 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"local_primal_indices[%d]=%d (%d,%d)\n",i,local_primal_indices[i],pcbddc->primal_indices_local_idxs[i],gidxs[i]);CHKERRQ(ierr); 8323 } 8324 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8325 ierr = PetscFree(gidxs);CHKERRQ(ierr); 8326 } 8327 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8328 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8329 if (set_error_reduced) SETERRQ(PetscObjectComm((PetscObject)pc),PETSC_ERR_PLIB,"BDDC Numbering of coarse dofs failed"); 8330 } 8331 /* ierr = PetscPrintf(PetscObjectComm((PetscObject)pc),"Size of coarse problem is %d\n",coarse_size);CHKERRQ(ierr); */ 8332 /* get back data */ 8333 *coarse_size_n = coarse_size; 8334 *local_primal_indices_n = local_primal_indices; 8335 PetscFunctionReturn(0); 8336 } 8337 8338 PetscErrorCode PCBDDCGlobalToLocal(VecScatter g2l_ctx,Vec gwork, Vec lwork, IS globalis, IS* localis) 8339 { 8340 IS localis_t; 8341 PetscInt i,lsize,*idxs,n; 8342 PetscScalar *vals; 8343 PetscErrorCode ierr; 8344 8345 PetscFunctionBegin; 8346 /* get indices in local ordering exploiting local to global map */ 8347 ierr = ISGetLocalSize(globalis,&lsize);CHKERRQ(ierr); 8348 ierr = PetscMalloc1(lsize,&vals);CHKERRQ(ierr); 8349 for (i=0;i<lsize;i++) vals[i] = 1.0; 8350 ierr = ISGetIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8351 ierr = VecSet(gwork,0.0);CHKERRQ(ierr); 8352 ierr = VecSet(lwork,0.0);CHKERRQ(ierr); 8353 if (idxs) { /* multilevel guard */ 8354 ierr = VecSetOption(gwork,VEC_IGNORE_NEGATIVE_INDICES,PETSC_TRUE);CHKERRQ(ierr); 8355 ierr = VecSetValues(gwork,lsize,idxs,vals,INSERT_VALUES);CHKERRQ(ierr); 8356 } 8357 ierr = VecAssemblyBegin(gwork);CHKERRQ(ierr); 8358 ierr = ISRestoreIndices(globalis,(const PetscInt**)&idxs);CHKERRQ(ierr); 8359 ierr = PetscFree(vals);CHKERRQ(ierr); 8360 ierr = VecAssemblyEnd(gwork);CHKERRQ(ierr); 8361 /* now compute set in local ordering */ 8362 ierr = VecScatterBegin(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8363 ierr = VecScatterEnd(g2l_ctx,gwork,lwork,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8364 ierr = VecGetArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8365 ierr = VecGetSize(lwork,&n);CHKERRQ(ierr); 8366 for (i=0,lsize=0;i<n;i++) { 8367 if (PetscRealPart(vals[i]) > 0.5) { 8368 lsize++; 8369 } 8370 } 8371 ierr = PetscMalloc1(lsize,&idxs);CHKERRQ(ierr); 8372 for (i=0,lsize=0;i<n;i++) { 8373 if (PetscRealPart(vals[i]) > 0.5) { 8374 idxs[lsize++] = i; 8375 } 8376 } 8377 ierr = VecRestoreArrayRead(lwork,(const PetscScalar**)&vals);CHKERRQ(ierr); 8378 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)gwork),lsize,idxs,PETSC_OWN_POINTER,&localis_t);CHKERRQ(ierr); 8379 *localis = localis_t; 8380 PetscFunctionReturn(0); 8381 } 8382 8383 PetscErrorCode PCBDDCSetUpSubSchurs(PC pc) 8384 { 8385 PC_IS *pcis=(PC_IS*)pc->data; 8386 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8387 PCBDDCSubSchurs sub_schurs=pcbddc->sub_schurs; 8388 Mat S_j; 8389 PetscInt *used_xadj,*used_adjncy; 8390 PetscBool free_used_adj; 8391 PetscErrorCode ierr; 8392 8393 PetscFunctionBegin; 8394 /* decide the adjacency to be used for determining internal problems for local schur on subsets */ 8395 free_used_adj = PETSC_FALSE; 8396 if (pcbddc->sub_schurs_layers == -1) { 8397 used_xadj = NULL; 8398 used_adjncy = NULL; 8399 } else { 8400 if (pcbddc->sub_schurs_use_useradj && pcbddc->mat_graph->xadj) { 8401 used_xadj = pcbddc->mat_graph->xadj; 8402 used_adjncy = pcbddc->mat_graph->adjncy; 8403 } else if (pcbddc->computed_rowadj) { 8404 used_xadj = pcbddc->mat_graph->xadj; 8405 used_adjncy = pcbddc->mat_graph->adjncy; 8406 } else { 8407 PetscBool flg_row=PETSC_FALSE; 8408 const PetscInt *xadj,*adjncy; 8409 PetscInt nvtxs; 8410 8411 ierr = MatGetRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8412 if (flg_row) { 8413 ierr = PetscMalloc2(nvtxs+1,&used_xadj,xadj[nvtxs],&used_adjncy);CHKERRQ(ierr); 8414 ierr = PetscMemcpy(used_xadj,xadj,(nvtxs+1)*sizeof(*xadj));CHKERRQ(ierr); 8415 ierr = PetscMemcpy(used_adjncy,adjncy,(xadj[nvtxs])*sizeof(*adjncy));CHKERRQ(ierr); 8416 free_used_adj = PETSC_TRUE; 8417 } else { 8418 pcbddc->sub_schurs_layers = -1; 8419 used_xadj = NULL; 8420 used_adjncy = NULL; 8421 } 8422 ierr = MatRestoreRowIJ(pcbddc->local_mat,0,PETSC_TRUE,PETSC_FALSE,&nvtxs,&xadj,&adjncy,&flg_row);CHKERRQ(ierr); 8423 } 8424 } 8425 8426 /* setup sub_schurs data */ 8427 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8428 if (!sub_schurs->schur_explicit) { 8429 /* pcbddc->ksp_D up to date only if not using MatFactor with Schur complement support */ 8430 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8431 ierr = PCBDDCSubSchursSetUp(sub_schurs,NULL,S_j,PETSC_FALSE,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,NULL,pcbddc->adaptive_selection,PETSC_FALSE,PETSC_FALSE,0,NULL,NULL,NULL,NULL);CHKERRQ(ierr); 8432 } else { 8433 Mat change = NULL; 8434 Vec scaling = NULL; 8435 IS change_primal = NULL, iP; 8436 PetscInt benign_n; 8437 PetscBool reuse_solvers = (PetscBool)!pcbddc->use_change_of_basis; 8438 PetscBool isseqaij,need_change = PETSC_FALSE; 8439 PetscBool discrete_harmonic = PETSC_FALSE; 8440 8441 if (!pcbddc->use_vertices && reuse_solvers) { 8442 PetscInt n_vertices; 8443 8444 ierr = ISGetLocalSize(sub_schurs->is_vertices,&n_vertices);CHKERRQ(ierr); 8445 reuse_solvers = (PetscBool)!n_vertices; 8446 } 8447 ierr = PetscObjectTypeCompare((PetscObject)pcbddc->local_mat,MATSEQAIJ,&isseqaij);CHKERRQ(ierr); 8448 if (!isseqaij) { 8449 Mat_IS* matis = (Mat_IS*)pc->pmat->data; 8450 if (matis->A == pcbddc->local_mat) { 8451 ierr = MatDestroy(&pcbddc->local_mat);CHKERRQ(ierr); 8452 ierr = MatConvert(matis->A,MATSEQAIJ,MAT_INITIAL_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8453 } else { 8454 ierr = MatConvert(pcbddc->local_mat,MATSEQAIJ,MAT_INPLACE_MATRIX,&pcbddc->local_mat);CHKERRQ(ierr); 8455 } 8456 } 8457 if (!pcbddc->benign_change_explicit) { 8458 benign_n = pcbddc->benign_n; 8459 } else { 8460 benign_n = 0; 8461 } 8462 /* sub_schurs->change is a local object; instead, PCBDDCConstraintsSetUp and the quantities used in the test below are logically collective on pc. 8463 We need a global reduction to avoid possible deadlocks. 8464 We assume that sub_schurs->change is created once, and then reused for different solves, unless the topography has been recomputed */ 8465 if (pcbddc->adaptive_userdefined || (pcbddc->deluxe_zerorows && !pcbddc->use_change_of_basis)) { 8466 PetscBool have_loc_change = (PetscBool)(!!sub_schurs->change); 8467 ierr = MPIU_Allreduce(&have_loc_change,&need_change,1,MPIU_BOOL,MPI_LOR,PetscObjectComm((PetscObject)pc));CHKERRQ(ierr); 8468 need_change = (PetscBool)(!need_change); 8469 } 8470 /* If the user defines additional constraints, we import them here. 8471 We need to compute the change of basis according to the quadrature weights attached to pmat via MatSetNearNullSpace, and this could not be done (at the moment) without some hacking */ 8472 if (need_change) { 8473 PC_IS *pcisf; 8474 PC_BDDC *pcbddcf; 8475 PC pcf; 8476 8477 if (pcbddc->sub_schurs_rebuild) SETERRQ(PETSC_COMM_SELF,PETSC_ERR_SUP,"Cannot compute change of basis with a different graph"); 8478 ierr = PCCreate(PetscObjectComm((PetscObject)pc),&pcf);CHKERRQ(ierr); 8479 ierr = PCSetOperators(pcf,pc->mat,pc->pmat);CHKERRQ(ierr); 8480 ierr = PCSetType(pcf,PCBDDC);CHKERRQ(ierr); 8481 8482 /* hacks */ 8483 pcisf = (PC_IS*)pcf->data; 8484 pcisf->is_B_local = pcis->is_B_local; 8485 pcisf->vec1_N = pcis->vec1_N; 8486 pcisf->BtoNmap = pcis->BtoNmap; 8487 pcisf->n = pcis->n; 8488 pcisf->n_B = pcis->n_B; 8489 pcbddcf = (PC_BDDC*)pcf->data; 8490 ierr = PetscFree(pcbddcf->mat_graph);CHKERRQ(ierr); 8491 pcbddcf->mat_graph = pcbddc->mat_graph; 8492 pcbddcf->use_faces = PETSC_TRUE; 8493 pcbddcf->use_change_of_basis = PETSC_TRUE; 8494 pcbddcf->use_change_on_faces = PETSC_TRUE; 8495 pcbddcf->use_qr_single = PETSC_TRUE; 8496 pcbddcf->fake_change = PETSC_TRUE; 8497 8498 /* setup constraints so that we can get information on primal vertices and change of basis (in local numbering) */ 8499 ierr = PCBDDCConstraintsSetUp(pcf);CHKERRQ(ierr); 8500 sub_schurs->change_with_qr = pcbddcf->use_qr_single; 8501 ierr = ISCreateGeneral(PETSC_COMM_SELF,pcbddcf->n_vertices,pcbddcf->local_primal_ref_node,PETSC_COPY_VALUES,&change_primal);CHKERRQ(ierr); 8502 change = pcbddcf->ConstraintMatrix; 8503 pcbddcf->ConstraintMatrix = NULL; 8504 8505 /* free unneeded memory allocated in PCBDDCConstraintsSetUp */ 8506 ierr = PetscFree(pcbddcf->sub_schurs);CHKERRQ(ierr); 8507 ierr = MatNullSpaceDestroy(&pcbddcf->onearnullspace);CHKERRQ(ierr); 8508 ierr = PetscFree2(pcbddcf->local_primal_ref_node,pcbddcf->local_primal_ref_mult);CHKERRQ(ierr); 8509 ierr = PetscFree(pcbddcf->primal_indices_local_idxs);CHKERRQ(ierr); 8510 ierr = PetscFree(pcbddcf->onearnullvecs_state);CHKERRQ(ierr); 8511 ierr = PetscFree(pcf->data);CHKERRQ(ierr); 8512 pcf->ops->destroy = NULL; 8513 pcf->ops->reset = NULL; 8514 ierr = PCDestroy(&pcf);CHKERRQ(ierr); 8515 } 8516 if (!pcbddc->use_deluxe_scaling) scaling = pcis->D; 8517 8518 ierr = PetscObjectQuery((PetscObject)pc,"__KSPFETIDP_iP",(PetscObject*)&iP);CHKERRQ(ierr); 8519 if (iP) { 8520 ierr = PetscOptionsBegin(PetscObjectComm((PetscObject)iP),sub_schurs->prefix,"BDDC sub_schurs options","PC");CHKERRQ(ierr); 8521 ierr = PetscOptionsBool("-sub_schurs_discrete_harmonic",NULL,NULL,discrete_harmonic,&discrete_harmonic,NULL);CHKERRQ(ierr); 8522 ierr = PetscOptionsEnd();CHKERRQ(ierr); 8523 } 8524 if (discrete_harmonic) { 8525 Mat A; 8526 ierr = MatDuplicate(pcbddc->local_mat,MAT_COPY_VALUES,&A);CHKERRQ(ierr); 8527 ierr = MatZeroRowsColumnsIS(A,iP,1.0,NULL,NULL);CHKERRQ(ierr); 8528 ierr = PetscObjectCompose((PetscObject)A,"__KSPFETIDP_iP",(PetscObject)iP);CHKERRQ(ierr); 8529 ierr = PCBDDCSubSchursSetUp(sub_schurs,A,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8530 ierr = MatDestroy(&A);CHKERRQ(ierr); 8531 } else { 8532 ierr = PCBDDCSubSchursSetUp(sub_schurs,pcbddc->local_mat,S_j,pcbddc->sub_schurs_exact_schur,used_xadj,used_adjncy,pcbddc->sub_schurs_layers,scaling,pcbddc->adaptive_selection,reuse_solvers,pcbddc->benign_saddle_point,benign_n,pcbddc->benign_p0_lidx,pcbddc->benign_zerodiag_subs,change,change_primal);CHKERRQ(ierr); 8533 } 8534 ierr = MatDestroy(&change);CHKERRQ(ierr); 8535 ierr = ISDestroy(&change_primal);CHKERRQ(ierr); 8536 } 8537 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8538 8539 /* free adjacency */ 8540 if (free_used_adj) { 8541 ierr = PetscFree2(used_xadj,used_adjncy);CHKERRQ(ierr); 8542 } 8543 PetscFunctionReturn(0); 8544 } 8545 8546 PetscErrorCode PCBDDCInitSubSchurs(PC pc) 8547 { 8548 PC_IS *pcis=(PC_IS*)pc->data; 8549 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8550 PCBDDCGraph graph; 8551 PetscErrorCode ierr; 8552 8553 PetscFunctionBegin; 8554 /* attach interface graph for determining subsets */ 8555 if (pcbddc->sub_schurs_rebuild) { /* in case rebuild has been requested, it uses a graph generated only by the neighbouring information */ 8556 IS verticesIS,verticescomm; 8557 PetscInt vsize,*idxs; 8558 8559 ierr = PCBDDCGraphGetCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8560 ierr = ISGetSize(verticesIS,&vsize);CHKERRQ(ierr); 8561 ierr = ISGetIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8562 ierr = ISCreateGeneral(PetscObjectComm((PetscObject)pc),vsize,idxs,PETSC_COPY_VALUES,&verticescomm);CHKERRQ(ierr); 8563 ierr = ISRestoreIndices(verticesIS,(const PetscInt**)&idxs);CHKERRQ(ierr); 8564 ierr = PCBDDCGraphRestoreCandidatesIS(pcbddc->mat_graph,NULL,NULL,NULL,NULL,&verticesIS);CHKERRQ(ierr); 8565 ierr = PCBDDCGraphCreate(&graph);CHKERRQ(ierr); 8566 ierr = PCBDDCGraphInit(graph,pcbddc->mat_graph->l2gmap,pcbddc->mat_graph->nvtxs_global,pcbddc->graphmaxcount);CHKERRQ(ierr); 8567 ierr = PCBDDCGraphSetUp(graph,pcbddc->mat_graph->custom_minimal_size,NULL,pcbddc->DirichletBoundariesLocal,0,NULL,verticescomm);CHKERRQ(ierr); 8568 ierr = ISDestroy(&verticescomm);CHKERRQ(ierr); 8569 ierr = PCBDDCGraphComputeConnectedComponents(graph);CHKERRQ(ierr); 8570 } else { 8571 graph = pcbddc->mat_graph; 8572 } 8573 /* print some info */ 8574 if (pcbddc->dbg_flag && !pcbddc->sub_schurs_rebuild) { 8575 IS vertices; 8576 PetscInt nv,nedges,nfaces; 8577 ierr = PCBDDCGraphASCIIView(graph,pcbddc->dbg_flag,pcbddc->dbg_viewer);CHKERRQ(ierr); 8578 ierr = PCBDDCGraphGetCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8579 ierr = ISGetSize(vertices,&nv);CHKERRQ(ierr); 8580 ierr = PetscViewerASCIIPushSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8581 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"--------------------------------------------------------------\n");CHKERRQ(ierr); 8582 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate vertices (%d)\n",PetscGlobalRank,nv,pcbddc->use_vertices);CHKERRQ(ierr); 8583 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate edges (%d)\n",PetscGlobalRank,nedges,pcbddc->use_edges);CHKERRQ(ierr); 8584 ierr = PetscViewerASCIISynchronizedPrintf(pcbddc->dbg_viewer,"Subdomain %04d got %02d local candidate faces (%d)\n",PetscGlobalRank,nfaces,pcbddc->use_faces);CHKERRQ(ierr); 8585 ierr = PetscViewerFlush(pcbddc->dbg_viewer);CHKERRQ(ierr); 8586 ierr = PetscViewerASCIIPopSynchronized(pcbddc->dbg_viewer);CHKERRQ(ierr); 8587 ierr = PCBDDCGraphRestoreCandidatesIS(graph,&nfaces,NULL,&nedges,NULL,&vertices);CHKERRQ(ierr); 8588 } 8589 8590 /* sub_schurs init */ 8591 if (!pcbddc->sub_schurs) { 8592 ierr = PCBDDCSubSchursCreate(&pcbddc->sub_schurs);CHKERRQ(ierr); 8593 } 8594 ierr = PCBDDCSubSchursInit(pcbddc->sub_schurs,pcis->is_I_local,pcis->is_B_local,graph,pcis->BtoNmap,pcbddc->sub_schurs_rebuild);CHKERRQ(ierr); 8595 pcbddc->sub_schurs->prefix = ((PetscObject)pc)->prefix; 8596 8597 /* free graph struct */ 8598 if (pcbddc->sub_schurs_rebuild) { 8599 ierr = PCBDDCGraphDestroy(&graph);CHKERRQ(ierr); 8600 } 8601 PetscFunctionReturn(0); 8602 } 8603 8604 PetscErrorCode PCBDDCCheckOperator(PC pc) 8605 { 8606 PC_IS *pcis=(PC_IS*)pc->data; 8607 PC_BDDC *pcbddc=(PC_BDDC*)pc->data; 8608 PetscErrorCode ierr; 8609 8610 PetscFunctionBegin; 8611 if (pcbddc->n_vertices == pcbddc->local_primal_size) { 8612 IS zerodiag = NULL; 8613 Mat S_j,B0_B=NULL; 8614 Vec dummy_vec=NULL,vec_check_B,vec_scale_P; 8615 PetscScalar *p0_check,*array,*array2; 8616 PetscReal norm; 8617 PetscInt i; 8618 8619 /* B0 and B0_B */ 8620 if (zerodiag) { 8621 IS dummy; 8622 8623 ierr = ISCreateStride(PETSC_COMM_SELF,pcbddc->benign_n,0,1,&dummy);CHKERRQ(ierr); 8624 ierr = MatCreateSubMatrix(pcbddc->benign_B0,dummy,pcis->is_B_local,MAT_INITIAL_MATRIX,&B0_B);CHKERRQ(ierr); 8625 ierr = MatCreateVecs(B0_B,NULL,&dummy_vec);CHKERRQ(ierr); 8626 ierr = ISDestroy(&dummy);CHKERRQ(ierr); 8627 } 8628 /* I need a primal vector to scale primal nodes since BDDC sums contibutions */ 8629 ierr = VecDuplicate(pcbddc->vec1_P,&vec_scale_P);CHKERRQ(ierr); 8630 ierr = VecSet(pcbddc->vec1_P,1.0);CHKERRQ(ierr); 8631 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8632 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8633 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8634 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,vec_scale_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8635 ierr = VecReciprocal(vec_scale_P);CHKERRQ(ierr); 8636 /* S_j */ 8637 ierr = MatCreateSchurComplement(pcis->A_II,pcis->A_II,pcis->A_IB,pcis->A_BI,pcis->A_BB,&S_j);CHKERRQ(ierr); 8638 ierr = MatSchurComplementSetKSP(S_j,pcbddc->ksp_D);CHKERRQ(ierr); 8639 8640 /* mimic vector in \widetilde{W}_\Gamma */ 8641 ierr = VecSetRandom(pcis->vec1_N,NULL);CHKERRQ(ierr); 8642 /* continuous in primal space */ 8643 ierr = VecSetRandom(pcbddc->coarse_vec,NULL);CHKERRQ(ierr); 8644 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8645 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8646 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8647 ierr = PetscCalloc1(pcbddc->benign_n,&p0_check);CHKERRQ(ierr); 8648 for (i=0;i<pcbddc->benign_n;i++) p0_check[i] = array[pcbddc->local_primal_size-pcbddc->benign_n+i]; 8649 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8650 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8651 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8652 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8653 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8654 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec2_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8655 ierr = VecDuplicate(pcis->vec2_B,&vec_check_B);CHKERRQ(ierr); 8656 ierr = VecCopy(pcis->vec2_B,vec_check_B);CHKERRQ(ierr); 8657 8658 /* assemble rhs for coarse problem */ 8659 /* widetilde{S}_\Gamma w_\Gamma + \widetilde{B0}^T_B p0 */ 8660 /* local with Schur */ 8661 ierr = MatMult(S_j,pcis->vec2_B,pcis->vec1_B);CHKERRQ(ierr); 8662 if (zerodiag) { 8663 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8664 for (i=0;i<pcbddc->benign_n;i++) array[i] = p0_check[i]; 8665 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8666 ierr = MatMultTransposeAdd(B0_B,dummy_vec,pcis->vec1_B,pcis->vec1_B);CHKERRQ(ierr); 8667 } 8668 /* sum on primal nodes the local contributions */ 8669 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8670 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_B,pcis->vec1_N,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8671 ierr = VecGetArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8672 ierr = VecGetArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8673 for (i=0;i<pcbddc->local_primal_size;i++) array2[i] = array[pcbddc->local_primal_ref_node[i]]; 8674 ierr = VecRestoreArray(pcbddc->vec1_P,&array2);CHKERRQ(ierr); 8675 ierr = VecRestoreArray(pcis->vec1_N,&array);CHKERRQ(ierr); 8676 ierr = VecSet(pcbddc->coarse_vec,0.);CHKERRQ(ierr); 8677 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8678 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->vec1_P,pcbddc->coarse_vec,ADD_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8679 ierr = VecScatterBegin(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8680 ierr = VecScatterEnd(pcbddc->coarse_loc_to_glob,pcbddc->coarse_vec,pcbddc->vec1_P,INSERT_VALUES,SCATTER_REVERSE);CHKERRQ(ierr); 8681 ierr = VecGetArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8682 /* scale primal nodes (BDDC sums contibutions) */ 8683 ierr = VecPointwiseMult(pcbddc->vec1_P,vec_scale_P,pcbddc->vec1_P);CHKERRQ(ierr); 8684 ierr = VecSetValues(pcis->vec1_N,pcbddc->local_primal_size,pcbddc->local_primal_ref_node,array,INSERT_VALUES);CHKERRQ(ierr); 8685 ierr = VecRestoreArray(pcbddc->vec1_P,&array);CHKERRQ(ierr); 8686 ierr = VecAssemblyBegin(pcis->vec1_N);CHKERRQ(ierr); 8687 ierr = VecAssemblyEnd(pcis->vec1_N);CHKERRQ(ierr); 8688 ierr = VecScatterBegin(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8689 ierr = VecScatterEnd(pcis->N_to_B,pcis->vec1_N,pcis->vec1_B,INSERT_VALUES,SCATTER_FORWARD);CHKERRQ(ierr); 8690 /* global: \widetilde{B0}_B w_\Gamma */ 8691 if (zerodiag) { 8692 ierr = MatMult(B0_B,pcis->vec2_B,dummy_vec);CHKERRQ(ierr); 8693 ierr = VecGetArray(dummy_vec,&array);CHKERRQ(ierr); 8694 for (i=0;i<pcbddc->benign_n;i++) pcbddc->benign_p0[i] = array[i]; 8695 ierr = VecRestoreArray(dummy_vec,&array);CHKERRQ(ierr); 8696 } 8697 /* BDDC */ 8698 ierr = VecSet(pcis->vec1_D,0.);CHKERRQ(ierr); 8699 ierr = PCBDDCApplyInterfacePreconditioner(pc,PETSC_FALSE);CHKERRQ(ierr); 8700 8701 ierr = VecCopy(pcis->vec1_B,pcis->vec2_B);CHKERRQ(ierr); 8702 ierr = VecAXPY(pcis->vec1_B,-1.0,vec_check_B);CHKERRQ(ierr); 8703 ierr = VecNorm(pcis->vec1_B,NORM_INFINITY,&norm);CHKERRQ(ierr); 8704 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC local error is %1.4e\n",PetscGlobalRank,norm); 8705 for (i=0;i<pcbddc->benign_n;i++) { 8706 PetscPrintf(PETSC_COMM_SELF,"[%d] BDDC p0[%d] error is %1.4e\n",PetscGlobalRank,i,PetscAbsScalar(pcbddc->benign_p0[i]-p0_check[i])); 8707 } 8708 ierr = PetscFree(p0_check);CHKERRQ(ierr); 8709 ierr = VecDestroy(&vec_scale_P);CHKERRQ(ierr); 8710 ierr = VecDestroy(&vec_check_B);CHKERRQ(ierr); 8711 ierr = VecDestroy(&dummy_vec);CHKERRQ(ierr); 8712 ierr = MatDestroy(&S_j);CHKERRQ(ierr); 8713 ierr = MatDestroy(&B0_B);CHKERRQ(ierr); 8714 } 8715 PetscFunctionReturn(0); 8716 } 8717 8718 #include <../src/mat/impls/aij/mpi/mpiaij.h> 8719 PetscErrorCode MatMPIAIJRestrict(Mat A, MPI_Comm ccomm, Mat *B) 8720 { 8721 Mat At; 8722 IS rows; 8723 PetscInt rst,ren; 8724 PetscErrorCode ierr; 8725 PetscLayout rmap; 8726 8727 PetscFunctionBegin; 8728 rst = ren = 0; 8729 if (ccomm != MPI_COMM_NULL) { 8730 ierr = PetscLayoutCreate(ccomm,&rmap);CHKERRQ(ierr); 8731 ierr = PetscLayoutSetSize(rmap,A->rmap->N);CHKERRQ(ierr); 8732 ierr = PetscLayoutSetBlockSize(rmap,1);CHKERRQ(ierr); 8733 ierr = PetscLayoutSetUp(rmap);CHKERRQ(ierr); 8734 ierr = PetscLayoutGetRange(rmap,&rst,&ren);CHKERRQ(ierr); 8735 } 8736 ierr = ISCreateStride(PetscObjectComm((PetscObject)A),ren-rst,rst,1,&rows);CHKERRQ(ierr); 8737 ierr = MatCreateSubMatrix(A,rows,NULL,MAT_INITIAL_MATRIX,&At);CHKERRQ(ierr); 8738 ierr = ISDestroy(&rows);CHKERRQ(ierr); 8739 8740 if (ccomm != MPI_COMM_NULL) { 8741 Mat_MPIAIJ *a,*b; 8742 IS from,to; 8743 Vec gvec; 8744 PetscInt lsize; 8745 8746 ierr = MatCreate(ccomm,B);CHKERRQ(ierr); 8747 ierr = MatSetSizes(*B,ren-rst,PETSC_DECIDE,PETSC_DECIDE,At->cmap->N);CHKERRQ(ierr); 8748 ierr = MatSetType(*B,MATAIJ);CHKERRQ(ierr); 8749 ierr = PetscLayoutDestroy(&((*B)->rmap));CHKERRQ(ierr); 8750 ierr = PetscLayoutSetUp((*B)->cmap);CHKERRQ(ierr); 8751 a = (Mat_MPIAIJ*)At->data; 8752 b = (Mat_MPIAIJ*)(*B)->data; 8753 ierr = MPI_Comm_size(ccomm,&b->size);CHKERRQ(ierr); 8754 ierr = MPI_Comm_rank(ccomm,&b->rank);CHKERRQ(ierr); 8755 ierr = PetscObjectReference((PetscObject)a->A);CHKERRQ(ierr); 8756 ierr = PetscObjectReference((PetscObject)a->B);CHKERRQ(ierr); 8757 b->A = a->A; 8758 b->B = a->B; 8759 8760 b->donotstash = a->donotstash; 8761 b->roworiented = a->roworiented; 8762 b->rowindices = 0; 8763 b->rowvalues = 0; 8764 b->getrowactive = PETSC_FALSE; 8765 8766 (*B)->rmap = rmap; 8767 (*B)->factortype = A->factortype; 8768 (*B)->assembled = PETSC_TRUE; 8769 (*B)->insertmode = NOT_SET_VALUES; 8770 (*B)->preallocated = PETSC_TRUE; 8771 8772 if (a->colmap) { 8773 #if defined(PETSC_USE_CTABLE) 8774 ierr = PetscTableCreateCopy(a->colmap,&b->colmap);CHKERRQ(ierr); 8775 #else 8776 ierr = PetscMalloc1(At->cmap->N,&b->colmap);CHKERRQ(ierr); 8777 ierr = PetscLogObjectMemory((PetscObject)*B,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8778 ierr = PetscMemcpy(b->colmap,a->colmap,At->cmap->N*sizeof(PetscInt));CHKERRQ(ierr); 8779 #endif 8780 } else b->colmap = 0; 8781 if (a->garray) { 8782 PetscInt len; 8783 len = a->B->cmap->n; 8784 ierr = PetscMalloc1(len+1,&b->garray);CHKERRQ(ierr); 8785 ierr = PetscLogObjectMemory((PetscObject)(*B),len*sizeof(PetscInt));CHKERRQ(ierr); 8786 if (len) { ierr = PetscMemcpy(b->garray,a->garray,len*sizeof(PetscInt));CHKERRQ(ierr); } 8787 } else b->garray = 0; 8788 8789 ierr = PetscObjectReference((PetscObject)a->lvec);CHKERRQ(ierr); 8790 b->lvec = a->lvec; 8791 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->lvec);CHKERRQ(ierr); 8792 8793 /* cannot use VecScatterCopy */ 8794 ierr = VecGetLocalSize(b->lvec,&lsize);CHKERRQ(ierr); 8795 ierr = ISCreateGeneral(ccomm,lsize,b->garray,PETSC_USE_POINTER,&from);CHKERRQ(ierr); 8796 ierr = ISCreateStride(PETSC_COMM_SELF,lsize,0,1,&to);CHKERRQ(ierr); 8797 ierr = MatCreateVecs(*B,&gvec,NULL);CHKERRQ(ierr); 8798 ierr = VecScatterCreate(gvec,from,b->lvec,to,&b->Mvctx);CHKERRQ(ierr); 8799 ierr = PetscLogObjectParent((PetscObject)*B,(PetscObject)b->Mvctx);CHKERRQ(ierr); 8800 ierr = ISDestroy(&from);CHKERRQ(ierr); 8801 ierr = ISDestroy(&to);CHKERRQ(ierr); 8802 ierr = VecDestroy(&gvec);CHKERRQ(ierr); 8803 } 8804 ierr = MatDestroy(&At);CHKERRQ(ierr); 8805 PetscFunctionReturn(0); 8806 } 8807